LLVM 12.0.0
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
85#include "llvm/Support/Debug.h"
87#include "llvm/Support/Format.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <utility>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE "ppc-lowering"
105
106static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108
109static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisableSCO("disable-ppc-sco",
116cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117
118static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120
121static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123
124// TODO - Remove this option if soft fp128 has been fully supported .
125static cl::opt<bool>
126 EnableSoftFP128("enable-soft-fp128",
127 cl::desc("temp option to enable soft fp128"), cl::Hidden);
128
129STATISTIC(NumTailCalls, "Number of tail calls");
130STATISTIC(NumSiblingCalls, "Number of sibling calls");
131STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133
134static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135
136static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137
138// FIXME: Remove this once the bug has been fixed!
140
142 const PPCSubtarget &STI)
143 : TargetLowering(TM), Subtarget(STI) {
144 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145 // arguments are at least 4/8 bytes aligned.
146 bool isPPC64 = Subtarget.isPPC64();
147 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148
149 // Set up the register classes.
150 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151 if (!useSoftFloat()) {
152 if (hasSPE()) {
153 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154 // EFPU2 APU only supports f32
155 if (!Subtarget.hasEFPU2())
156 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
157 } else {
158 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
159 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
160 }
161 }
162
163 // Match BITREVERSE to customized fast code sequence in the td file.
166
167 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
169
170 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
171 for (MVT VT : MVT::integer_valuetypes()) {
174 }
175
176 if (Subtarget.isISA3_0()) {
181 } else {
182 // No extending loads from f16 or HW conversions back and forth.
191 }
192
194
195 // PowerPC has pre-inc load and store's.
206 if (!Subtarget.hasSPE()) {
211 }
212
213 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
214 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
215 for (MVT VT : ScalarIntVTs) {
220 }
221
222 if (Subtarget.useCRBits()) {
224
225 if (isPPC64 || Subtarget.hasFPCVT()) {
228 isPPC64 ? MVT::i64 : MVT::i32);
231 isPPC64 ? MVT::i64 : MVT::i32);
232
235 isPPC64 ? MVT::i64 : MVT::i32);
238 isPPC64 ? MVT::i64 : MVT::i32);
239
242 isPPC64 ? MVT::i64 : MVT::i32);
245 isPPC64 ? MVT::i64 : MVT::i32);
246
249 isPPC64 ? MVT::i64 : MVT::i32);
252 isPPC64 ? MVT::i64 : MVT::i32);
253 } else {
258 }
259
260 // PowerPC does not support direct load/store of condition registers.
263
264 // FIXME: Remove this once the ANDI glue bug is fixed:
265 if (ANDIGlueBug)
267
268 for (MVT VT : MVT::integer_valuetypes()) {
272 }
273
274 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
275 }
276
277 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
278 // PPC (the libcall is not available).
283
284 // We do not currently implement these libm ops for PowerPC.
291
292 // PowerPC has no SREM/UREM instructions unless we are on P9
293 // On P9 we may use a hardware instruction to compute the remainder.
294 // When the result of both the remainder and the division is required it is
295 // more efficient to compute the remainder from the result of the division
296 // rather than use the remainder instruction. The instructions are legalized
297 // directly because the DivRemPairsPass performs the transformation at the IR
298 // level.
299 if (Subtarget.isISA3_0()) {
304 } else {
309 }
310
311 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
320
321 // Handle constrained floating-point operations of scalar.
322 // TODO: Handle SPE specific operation.
329
335 if (Subtarget.hasVSX()) {
338 }
339
340 if (Subtarget.hasFSQRT()) {
343 }
344
345 if (Subtarget.hasFPRND()) {
350
355 }
356
357 // We don't support sin/cos/sqrt/fmod/pow
368 if (Subtarget.hasSPE()) {
371 } else {
374 }
375
376 if (Subtarget.hasSPE())
378
380
381 // If we're enabling GP optimizations, use hardware square root
382 if (!Subtarget.hasFSQRT() &&
383 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
384 Subtarget.hasFRE()))
386
387 if (!Subtarget.hasFSQRT() &&
388 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
389 Subtarget.hasFRES()))
391
392 if (Subtarget.hasFCPSGN()) {
395 } else {
398 }
399
400 if (Subtarget.hasFPRND()) {
405
410 }
411
412 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
413 // to speed up scalar BSWAP64.
414 // CTPOP or CTTZ were introduced in P8/P9 respectively
416 if (Subtarget.hasP9Vector())
418 else
420 if (Subtarget.isISA3_0()) {
423 } else {
426 }
427
428 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
431 } else {
434 }
435
436 // PowerPC does not have ROTR
439
440 if (!Subtarget.useCRBits()) {
441 // PowerPC does not have Select
446 }
447
448 // PowerPC wants to turn select_cc of FP into fsel when possible.
451
452 // PowerPC wants to optimize integer setcc a bit
453 if (!Subtarget.useCRBits())
455
456 if (Subtarget.hasFPU()) {
460
464 }
465
466 // PowerPC does not have BRCOND which requires SetCC
467 if (!Subtarget.useCRBits())
469
471
472 if (Subtarget.hasSPE()) {
473 // SPE has built-in conversions
480 } else {
481 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
484
485 // PowerPC does not have [U|S]INT_TO_FP
490 }
491
492 if (Subtarget.hasDirectMove() && isPPC64) {
497 if (TM.Options.UnsafeFPMath) {
506 }
507 } else {
512 }
513
514 // We cannot sextinreg(i1). Expand to shifts.
516
517 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
518 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
519 // support continuation, user-level threading, and etc.. As a result, no
520 // other SjLj exception interfaces are implemented and please don't build
521 // your own exception handling based on them.
522 // LLVM/Clang supports zero-cost DWARF exception handling.
525
526 // We want to legalize GlobalAddress and ConstantPool nodes into the
527 // appropriate instructions to materialize the address.
538
539 // TRAP is legal.
541
542 // TRAMPOLINE is custom lowered.
545
546 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
548
549 if (Subtarget.is64BitELFABI()) {
550 // VAARG always uses double-word chunks, so promote anything smaller.
560 } else if (Subtarget.is32BitELFABI()) {
561 // VAARG is custom lowered with the 32-bit SVR4 ABI.
564 } else
566
567 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
568 if (Subtarget.is32BitELFABI())
570 else
572
573 // Use the default implementation.
583
584 // We want to custom lower some of our intrinsics.
586
587 // To handle counter-based loop conditions.
589
594
595 // Comparisons that require checking two conditions.
596 if (Subtarget.hasSPE()) {
601 }
614
617
618 if (Subtarget.has64BitSupport()) {
619 // They also have instructions for converting between i64 and fp.
628 // This is just the low 32 bits of a (signed) fp->i64 conversion.
629 // We cannot do this with Promote because i64 is not a legal type.
632
633 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
636 }
637 } else {
638 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
639 if (Subtarget.hasSPE()) {
642 } else {
645 }
646 }
647
648 // With the instructions enabled under FPCVT, we can do everything.
649 if (Subtarget.hasFPCVT()) {
650 if (Subtarget.has64BitSupport()) {
659 }
660
669 }
670
671 if (Subtarget.use64BitRegs()) {
672 // 64-bit PowerPC implementations can support i64 types directly
673 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
674 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
676 // 64-bit PowerPC wants to expand i128 shifts itself.
680 } else {
681 // 32-bit PowerPC wants to expand i64 shifts itself.
685 }
686
687 // PowerPC has better expansions for funnel shifts than the generic
688 // TargetLowering::expandFunnelShift.
689 if (Subtarget.has64BitSupport()) {
692 }
695
696 if (Subtarget.hasVSX()) {
701 }
702
703 if (Subtarget.hasAltivec()) {
704 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
709 }
710 // First set operation action for all vector types to expand. Then we
711 // will selectively turn on ones that can be effectively codegen'd.
713 // add/sub are legal for all supported vector VT's.
716
717 // For v2i64, these are only valid with P8Vector. This is corrected after
718 // the loop.
719 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
724 }
725 else {
730 }
731
732 if (Subtarget.hasVSX()) {
735 }
736
737 // Vector instructions introduced in P8
738 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
741 }
742 else {
745 }
746
747 // Vector instructions introduced in P9
748 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
750 else
752
753 // We promote all shuffles to v16i8.
756
757 // We promote all non-typed operations to v4i32.
773
774 // No other operations are legal.
812
818 }
819 }
821 if (!Subtarget.hasP8Vector()) {
826 }
827
828 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
829 // with merges, splats, etc.
831
832 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
833 // are cheap, so handle them before they get expanded to scalar.
839
845 Subtarget.useCRBits() ? Legal : Expand);
859
860 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
862 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
863 if (Subtarget.hasAltivec())
864 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
866 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
867 if (Subtarget.hasP8Altivec())
869
870 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
871 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
872 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
873 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
874
877
878 if (Subtarget.hasVSX()) {
881 }
882
883 if (Subtarget.hasP8Altivec())
885 else
887
888 if (Subtarget.isISA3_1()) {
907 }
908
911
914
919
920 // Altivec does not contain unordered floating-point compare instructions
925
926 if (Subtarget.hasVSX()) {
929 if (Subtarget.hasP8Vector()) {
932 }
933 if (Subtarget.hasDirectMove() && isPPC64) {
942 }
944
945 // The nearbyint variants are not allowed to raise the inexact exception
946 // so we can only code-gen them with unsafe math.
947 if (TM.Options.UnsafeFPMath) {
950 }
951
960
966
969
972
973 // Share the Altivec comparison restrictions.
978
981
983
984 if (Subtarget.hasP8Vector())
985 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
986
987 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
988
989 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
990 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
991 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
992
993 if (Subtarget.hasP8Altivec()) {
997
998 // 128 bit shifts can be accomplished via 3 instructions for SHL and
999 // SRL, but not for SRA because of the instructions available:
1000 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1001 // doing
1005
1007 }
1008 else {
1012
1014
1015 // VSX v2i64 only supports non-arithmetic operations.
1018 }
1019
1020 if (Subtarget.isISA3_1())
1022 else
1024
1029
1031
1040
1041 // Custom handling for partial vectors of integers converted to
1042 // floating point. We already have optimal handling for v2i32 through
1043 // the DAG combine, so those aren't necessary.
1060
1067
1068 if (Subtarget.hasDirectMove())
1071
1072 // Handle constrained floating-point operations of vector.
1073 // The predictor is `hasVSX` because altivec instruction has
1074 // no exception but VSX vector instruction has.
1088
1102
1103 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1104 }
1105
1106 if (Subtarget.hasP8Altivec()) {
1107 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1108 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1109 }
1110
1111 if (Subtarget.hasP9Vector()) {
1114
1115 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1116 // SRL, but not for SRA because of the instructions available:
1117 // VS{RL} and VS{RL}O.
1121
1122 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1128 // No extending loads to f128 on PPC.
1129 for (MVT FPT : MVT::fp_valuetypes())
1138
1145
1152 // No implementation for these ops for PowerPC.
1158
1159 // Handle constrained floating-point operations of fp128
1180 } else if (Subtarget.hasAltivec() && EnableSoftFP128) {
1181 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1182
1183 for (MVT FPT : MVT::fp_valuetypes())
1185
1188
1191
1192 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1193 // fp_to_uint and int_to_fp.
1196
1209
1212
1213 // Expand the fp_extend if the target type is fp128.
1216
1217 // Expand the fp_round if the source type is fp128.
1218 for (MVT VT : {MVT::f32, MVT::f64}) {
1221 }
1222 }
1223
1224 if (Subtarget.hasP9Altivec()) {
1227
1235 }
1236 }
1237
1238 if (Subtarget.pairedVectorMemops()) {
1239 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1242 }
1243 if (Subtarget.hasMMA()) {
1244 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1248 }
1249
1250 if (Subtarget.has64BitSupport())
1252
1253 if (Subtarget.isISA3_1())
1255
1257
1258 if (!isPPC64) {
1261 }
1262
1264
1265 if (Subtarget.hasAltivec()) {
1266 // Altivec instructions set fields to all zeros or all ones.
1268 }
1269
1270 if (!isPPC64) {
1271 // These libcalls are not available in 32-bit.
1272 setLibcallName(RTLIB::SHL_I128, nullptr);
1273 setLibcallName(RTLIB::SRL_I128, nullptr);
1274 setLibcallName(RTLIB::SRA_I128, nullptr);
1275 }
1276
1277 if (!isPPC64)
1279
1280 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1281
1282 // We have target-specific dag combine patterns for the following nodes:
1291 if (Subtarget.hasFPCVT())
1296 if (Subtarget.useCRBits())
1302
1306
1309
1310
1311 if (Subtarget.useCRBits()) {
1315 }
1316
1317 if (Subtarget.hasP9Altivec()) {
1320 }
1321
1322 setLibcallName(RTLIB::LOG_F128, "logf128");
1323 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1324 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1325 setLibcallName(RTLIB::EXP_F128, "expf128");
1326 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1327 setLibcallName(RTLIB::SIN_F128, "sinf128");
1328 setLibcallName(RTLIB::COS_F128, "cosf128");
1329 setLibcallName(RTLIB::POW_F128, "powf128");
1330 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1331 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1332 setLibcallName(RTLIB::REM_F128, "fmodf128");
1333 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1334 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1335 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1336 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1337 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1338 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1339 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1340 setLibcallName(RTLIB::RINT_F128, "rintf128");
1341 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1342 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1343 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1344 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1345
1346 // With 32 condition bits, we don't need to sink (and duplicate) compares
1347 // aggressively in CodeGenPrep.
1348 if (Subtarget.useCRBits()) {
1351 }
1352
1354
1355 switch (Subtarget.getCPUDirective()) {
1356 default: break;
1357 case PPC::DIR_970:
1358 case PPC::DIR_A2:
1359 case PPC::DIR_E500:
1360 case PPC::DIR_E500mc:
1361 case PPC::DIR_E5500:
1362 case PPC::DIR_PWR4:
1363 case PPC::DIR_PWR5:
1364 case PPC::DIR_PWR5X:
1365 case PPC::DIR_PWR6:
1366 case PPC::DIR_PWR6X:
1367 case PPC::DIR_PWR7:
1368 case PPC::DIR_PWR8:
1369 case PPC::DIR_PWR9:
1370 case PPC::DIR_PWR10:
1374 break;
1375 }
1376
1377 if (Subtarget.enableMachineScheduler())
1379 else
1381
1383
1384 // The Freescale cores do better with aggressive inlining of memcpy and
1385 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1386 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1387 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1388 MaxStoresPerMemset = 32;
1390 MaxStoresPerMemcpy = 32;
1394 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1395 // The A2 also benefits from (very) aggressive inlining of memcpy and
1396 // friends. The overhead of a the function call, even when warm, can be
1397 // over one hundred cycles.
1398 MaxStoresPerMemset = 128;
1399 MaxStoresPerMemcpy = 128;
1400 MaxStoresPerMemmove = 128;
1401 MaxLoadsPerMemcmp = 128;
1402 } else {
1405 }
1406
1407 IsStrictFPEnabled = true;
1408
1409 // Let the subtarget (CPU) decide if a predictable select is more expensive
1410 // than the corresponding branch. This information is used in CGP to decide
1411 // when to convert selects into branches.
1413}
1414
1415/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1416/// the desired ByVal argument alignment.
1418 if (MaxAlign == MaxMaxAlign)
1419 return;
1420 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1421 if (MaxMaxAlign >= 32 &&
1422 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1423 MaxAlign = Align(32);
1424 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1425 MaxAlign < 16)
1426 MaxAlign = Align(16);
1427 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1429 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1430 if (EltAlign > MaxAlign)
1432 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1433 for (auto *EltTy : STy->elements()) {
1436 if (EltAlign > MaxAlign)
1438 if (MaxAlign == MaxMaxAlign)
1439 break;
1440 }
1441 }
1442}
1443
1444/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1445/// function arguments in the caller parameter area.
1447 const DataLayout &DL) const {
1448 // 16byte and wider vectors are passed on 16byte boundary.
1449 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1450 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1451 if (Subtarget.hasAltivec())
1452 getMaxByValAlign(Ty, Alignment, Align(16));
1453 return Alignment.value();
1454}
1455
1457 return Subtarget.useSoftFloat();
1458}
1459
1461 return Subtarget.hasSPE();
1462}
1463
1465 return VT.isScalarInteger();
1466}
1467
1468const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1469 switch ((PPCISD::NodeType)Opcode) {
1470 case PPCISD::FIRST_NUMBER: break;
1471 case PPCISD::FSEL: return "PPCISD::FSEL";
1472 case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1473 case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1474 case PPCISD::FCFID: return "PPCISD::FCFID";
1475 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1476 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1477 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1478 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1479 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1480 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1481 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1483 return "PPCISD::FP_TO_UINT_IN_VSR,";
1485 return "PPCISD::FP_TO_SINT_IN_VSR";
1486 case PPCISD::FRE: return "PPCISD::FRE";
1487 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1488 case PPCISD::FTSQRT:
1489 return "PPCISD::FTSQRT";
1490 case PPCISD::FSQRT:
1491 return "PPCISD::FSQRT";
1492 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1493 case PPCISD::VPERM: return "PPCISD::VPERM";
1494 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1496 return "PPCISD::XXSPLTI_SP_TO_DP";
1498 return "PPCISD::XXSPLTI32DX";
1499 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1500 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1501 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1502 case PPCISD::CMPB: return "PPCISD::CMPB";
1503 case PPCISD::Hi: return "PPCISD::Hi";
1504 case PPCISD::Lo: return "PPCISD::Lo";
1505 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1506 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1507 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1508 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1509 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1510 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1511 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1512 case PPCISD::SRL: return "PPCISD::SRL";
1513 case PPCISD::SRA: return "PPCISD::SRA";
1514 case PPCISD::SHL: return "PPCISD::SHL";
1515 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1516 case PPCISD::CALL: return "PPCISD::CALL";
1517 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1518 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1519 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1520 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1521 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1522 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1523 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1524 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1525 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1526 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1527 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1528 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1529 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1530 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1531 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1533 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1535 return "PPCISD::ANDI_rec_1_EQ_BIT";
1537 return "PPCISD::ANDI_rec_1_GT_BIT";
1538 case PPCISD::VCMP: return "PPCISD::VCMP";
1539 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1540 case PPCISD::LBRX: return "PPCISD::LBRX";
1541 case PPCISD::STBRX: return "PPCISD::STBRX";
1542 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1543 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1544 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1545 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1546 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1547 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1548 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1549 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1550 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1552 return "PPCISD::ST_VSR_SCAL_INT";
1553 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1554 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1555 case PPCISD::BDZ: return "PPCISD::BDZ";
1556 case PPCISD::MFFS: return "PPCISD::MFFS";
1557 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1558 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1559 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1560 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1561 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1562 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1563 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1564 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1565 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1566 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1567 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1568 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1569 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1570 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1571 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1572 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1573 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1574 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1575 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1577 return "PPCISD::PADDI_DTPREL";
1578 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1579 case PPCISD::SC: return "PPCISD::SC";
1580 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1581 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1582 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1583 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1584 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1585 case PPCISD::VABSD: return "PPCISD::VABSD";
1586 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1587 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1588 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1589 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1590 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1591 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1592 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1594 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1596 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1597 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1598 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1599 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1600 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1601 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1602 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1604 return "PPCISD::STRICT_FADDRTZ";
1606 return "PPCISD::STRICT_FCTIDZ";
1608 return "PPCISD::STRICT_FCTIWZ";
1610 return "PPCISD::STRICT_FCTIDUZ";
1612 return "PPCISD::STRICT_FCTIWUZ";
1614 return "PPCISD::STRICT_FCFID";
1616 return "PPCISD::STRICT_FCFIDU";
1618 return "PPCISD::STRICT_FCFIDS";
1620 return "PPCISD::STRICT_FCFIDUS";
1621 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1622 }
1623 return nullptr;
1624}
1625
1627 EVT VT) const {
1628 if (!VT.isVector())
1629 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1630
1632}
1633
1635 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1636 return true;
1637}
1638
1639//===----------------------------------------------------------------------===//
1640// Node matching predicates, for use by the tblgen matching code.
1641//===----------------------------------------------------------------------===//
1642
1643/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1646 return CFP->getValueAPF().isZero();
1647 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1648 // Maybe this has already been legalized into the constant pool?
1649 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1650 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1651 return CFP->getValueAPF().isZero();
1652 }
1653 return false;
1654}
1655
1656/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1657/// true if Op is undef or if it matches the specified value.
1658static bool isConstantOrUndef(int Op, int Val) {
1659 return Op < 0 || Op == Val;
1660}
1661
1662/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1663/// VPKUHUM instruction.
1664/// The ShuffleKind distinguishes between big-endian operations with
1665/// two different inputs (0), either-endian operations with two identical
1666/// inputs (1), and little-endian operations with two different inputs (2).
1667/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1669 SelectionDAG &DAG) {
1670 bool IsLE = DAG.getDataLayout().isLittleEndian();
1671 if (ShuffleKind == 0) {
1672 if (IsLE)
1673 return false;
1674 for (unsigned i = 0; i != 16; ++i)
1675 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1676 return false;
1677 } else if (ShuffleKind == 2) {
1678 if (!IsLE)
1679 return false;
1680 for (unsigned i = 0; i != 16; ++i)
1681 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1682 return false;
1683 } else if (ShuffleKind == 1) {
1684 unsigned j = IsLE ? 0 : 1;
1685 for (unsigned i = 0; i != 8; ++i)
1686 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1687 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1688 return false;
1689 }
1690 return true;
1691}
1692
1693/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1694/// VPKUWUM instruction.
1695/// The ShuffleKind distinguishes between big-endian operations with
1696/// two different inputs (0), either-endian operations with two identical
1697/// inputs (1), and little-endian operations with two different inputs (2).
1698/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1700 SelectionDAG &DAG) {
1701 bool IsLE = DAG.getDataLayout().isLittleEndian();
1702 if (ShuffleKind == 0) {
1703 if (IsLE)
1704 return false;
1705 for (unsigned i = 0; i != 16; i += 2)
1706 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1707 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1708 return false;
1709 } else if (ShuffleKind == 2) {
1710 if (!IsLE)
1711 return false;
1712 for (unsigned i = 0; i != 16; i += 2)
1713 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1714 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1715 return false;
1716 } else if (ShuffleKind == 1) {
1717 unsigned j = IsLE ? 0 : 2;
1718 for (unsigned i = 0; i != 8; i += 2)
1719 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1720 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1721 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1722 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1723 return false;
1724 }
1725 return true;
1726}
1727
1728/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1729/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1730/// current subtarget.
1731///
1732/// The ShuffleKind distinguishes between big-endian operations with
1733/// two different inputs (0), either-endian operations with two identical
1734/// inputs (1), and little-endian operations with two different inputs (2).
1735/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1737 SelectionDAG &DAG) {
1738 const PPCSubtarget& Subtarget =
1739 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1740 if (!Subtarget.hasP8Vector())
1741 return false;
1742
1743 bool IsLE = DAG.getDataLayout().isLittleEndian();
1744 if (ShuffleKind == 0) {
1745 if (IsLE)
1746 return false;
1747 for (unsigned i = 0; i != 16; i += 4)
1748 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1749 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1750 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1751 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1752 return false;
1753 } else if (ShuffleKind == 2) {
1754 if (!IsLE)
1755 return false;
1756 for (unsigned i = 0; i != 16; i += 4)
1757 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1758 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1759 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1760 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1761 return false;
1762 } else if (ShuffleKind == 1) {
1763 unsigned j = IsLE ? 0 : 4;
1764 for (unsigned i = 0; i != 8; i += 4)
1765 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1766 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1767 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1768 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1769 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1770 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1771 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1772 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1773 return false;
1774 }
1775 return true;
1776}
1777
1778/// isVMerge - Common function, used to match vmrg* shuffles.
1779///
1780static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1781 unsigned LHSStart, unsigned RHSStart) {
1782 if (N->getValueType(0) != MVT::v16i8)
1783 return false;
1784 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1785 "Unsupported merge size!");
1786
1787 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1788 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1789 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1790 LHSStart+j+i*UnitSize) ||
1791 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1792 RHSStart+j+i*UnitSize))
1793 return false;
1794 }
1795 return true;
1796}
1797
1798/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1799/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1800/// The ShuffleKind distinguishes between big-endian merges with two
1801/// different inputs (0), either-endian merges with two identical inputs (1),
1802/// and little-endian merges with two different inputs (2). For the latter,
1803/// the input operands are swapped (see PPCInstrAltivec.td).
1805 unsigned ShuffleKind, SelectionDAG &DAG) {
1806 if (DAG.getDataLayout().isLittleEndian()) {
1807 if (ShuffleKind == 1) // unary
1808 return isVMerge(N, UnitSize, 0, 0);
1809 else if (ShuffleKind == 2) // swapped
1810 return isVMerge(N, UnitSize, 0, 16);
1811 else
1812 return false;
1813 } else {
1814 if (ShuffleKind == 1) // unary
1815 return isVMerge(N, UnitSize, 8, 8);
1816 else if (ShuffleKind == 0) // normal
1817 return isVMerge(N, UnitSize, 8, 24);
1818 else
1819 return false;
1820 }
1821}
1822
1823/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1824/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1825/// The ShuffleKind distinguishes between big-endian merges with two
1826/// different inputs (0), either-endian merges with two identical inputs (1),
1827/// and little-endian merges with two different inputs (2). For the latter,
1828/// the input operands are swapped (see PPCInstrAltivec.td).
1830 unsigned ShuffleKind, SelectionDAG &DAG) {
1831 if (DAG.getDataLayout().isLittleEndian()) {
1832 if (ShuffleKind == 1) // unary
1833 return isVMerge(N, UnitSize, 8, 8);
1834 else if (ShuffleKind == 2) // swapped
1835 return isVMerge(N, UnitSize, 8, 24);
1836 else
1837 return false;
1838 } else {
1839 if (ShuffleKind == 1) // unary
1840 return isVMerge(N, UnitSize, 0, 0);
1841 else if (ShuffleKind == 0) // normal
1842 return isVMerge(N, UnitSize, 0, 16);
1843 else
1844 return false;
1845 }
1846}
1847
1848/**
1849 * Common function used to match vmrgew and vmrgow shuffles
1850 *
1851 * The indexOffset determines whether to look for even or odd words in
1852 * the shuffle mask. This is based on the of the endianness of the target
1853 * machine.
1854 * - Little Endian:
1855 * - Use offset of 0 to check for odd elements
1856 * - Use offset of 4 to check for even elements
1857 * - Big Endian:
1858 * - Use offset of 0 to check for even elements
1859 * - Use offset of 4 to check for odd elements
1860 * A detailed description of the vector element ordering for little endian and
1861 * big endian can be found at
1862 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1863 * Targeting your applications - what little endian and big endian IBM XL C/C++
1864 * compiler differences mean to you
1865 *
1866 * The mask to the shuffle vector instruction specifies the indices of the
1867 * elements from the two input vectors to place in the result. The elements are
1868 * numbered in array-access order, starting with the first vector. These vectors
1869 * are always of type v16i8, thus each vector will contain 16 elements of size
1870 * 8. More info on the shuffle vector can be found in the
1871 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1872 * Language Reference.
1873 *
1874 * The RHSStartValue indicates whether the same input vectors are used (unary)
1875 * or two different input vectors are used, based on the following:
1876 * - If the instruction uses the same vector for both inputs, the range of the
1877 * indices will be 0 to 15. In this case, the RHSStart value passed should
1878 * be 0.
1879 * - If the instruction has two different vectors then the range of the
1880 * indices will be 0 to 31. In this case, the RHSStart value passed should
1881 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1882 * to 31 specify elements in the second vector).
1883 *
1884 * \param[in] N The shuffle vector SD Node to analyze
1885 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1886 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1887 * vector to the shuffle_vector instruction
1888 * \return true iff this shuffle vector represents an even or odd word merge
1889 */
1891 unsigned RHSStartValue) {
1892 if (N->getValueType(0) != MVT::v16i8)
1893 return false;
1894
1895 for (unsigned i = 0; i < 2; ++i)
1896 for (unsigned j = 0; j < 4; ++j)
1897 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1899 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1901 return false;
1902 return true;
1903}
1904
1905/**
1906 * Determine if the specified shuffle mask is suitable for the vmrgew or
1907 * vmrgow instructions.
1908 *
1909 * \param[in] N The shuffle vector SD Node to analyze
1910 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1911 * \param[in] ShuffleKind Identify the type of merge:
1912 * - 0 = big-endian merge with two different inputs;
1913 * - 1 = either-endian merge with two identical inputs;
1914 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1915 * little-endian merges).
1916 * \param[in] DAG The current SelectionDAG
1917 * \return true iff this shuffle mask
1918 */
1920 unsigned ShuffleKind, SelectionDAG &DAG) {
1921 if (DAG.getDataLayout().isLittleEndian()) {
1922 unsigned indexOffset = CheckEven ? 4 : 0;
1923 if (ShuffleKind == 1) // Unary
1924 return isVMerge(N, indexOffset, 0);
1925 else if (ShuffleKind == 2) // swapped
1926 return isVMerge(N, indexOffset, 16);
1927 else
1928 return false;
1929 }
1930 else {
1931 unsigned indexOffset = CheckEven ? 0 : 4;
1932 if (ShuffleKind == 1) // Unary
1933 return isVMerge(N, indexOffset, 0);
1934 else if (ShuffleKind == 0) // Normal
1935 return isVMerge(N, indexOffset, 16);
1936 else
1937 return false;
1938 }
1939 return false;
1940}
1941
1942/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1943/// amount, otherwise return -1.
1944/// The ShuffleKind distinguishes between big-endian operations with two
1945/// different inputs (0), either-endian operations with two identical inputs
1946/// (1), and little-endian operations with two different inputs (2). For the
1947/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1948int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1949 SelectionDAG &DAG) {
1950 if (N->getValueType(0) != MVT::v16i8)
1951 return -1;
1952
1954
1955 // Find the first non-undef value in the shuffle mask.
1956 unsigned i;
1957 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1958 /*search*/;
1959
1960 if (i == 16) return -1; // all undef.
1961
1962 // Otherwise, check to see if the rest of the elements are consecutively
1963 // numbered from this value.
1964 unsigned ShiftAmt = SVOp->getMaskElt(i);
1965 if (ShiftAmt < i) return -1;
1966
1967 ShiftAmt -= i;
1968 bool isLE = DAG.getDataLayout().isLittleEndian();
1969
1970 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1971 // Check the rest of the elements to see if they are consecutive.
1972 for (++i; i != 16; ++i)
1973 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1974 return -1;
1975 } else if (ShuffleKind == 1) {
1976 // Check the rest of the elements to see if they are consecutive.
1977 for (++i; i != 16; ++i)
1978 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1979 return -1;
1980 } else
1981 return -1;
1982
1983 if (isLE)
1984 ShiftAmt = 16 - ShiftAmt;
1985
1986 return ShiftAmt;
1987}
1988
1989/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1990/// specifies a splat of a single element that is suitable for input to
1991/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1993 assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1994 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1995
1996 // The consecutive indices need to specify an element, not part of two
1997 // different elements. So abandon ship early if this isn't the case.
1998 if (N->getMaskElt(0) % EltSize != 0)
1999 return false;
2000
2001 // This is a splat operation if each element of the permute is the same, and
2002 // if the value doesn't reference the second vector.
2003 unsigned ElementBase = N->getMaskElt(0);
2004
2005 // FIXME: Handle UNDEF elements too!
2006 if (ElementBase >= 16)
2007 return false;
2008
2009 // Check that the indices are consecutive, in the case of a multi-byte element
2010 // splatted with a v16i8 mask.
2011 for (unsigned i = 1; i != EltSize; ++i)
2012 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2013 return false;
2014
2015 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2016 if (N->getMaskElt(i) < 0) continue;
2017 for (unsigned j = 0; j != EltSize; ++j)
2018 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2019 return false;
2020 }
2021 return true;
2022}
2023
2024/// Check that the mask is shuffling N byte elements. Within each N byte
2025/// element of the mask, the indices could be either in increasing or
2026/// decreasing order as long as they are consecutive.
2027/// \param[in] N the shuffle vector SD Node to analyze
2028/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2029/// Word/DoubleWord/QuadWord).
2030/// \param[in] StepLen the delta indices number among the N byte element, if
2031/// the mask is in increasing/decreasing order then it is 1/-1.
2032/// \return true iff the mask is shuffling N byte elements.
2033static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2034 int StepLen) {
2035 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2036 "Unexpected element width.");
2037 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2038
2039 unsigned NumOfElem = 16 / Width;
2040 unsigned MaskVal[16]; // Width is never greater than 16
2041 for (unsigned i = 0; i < NumOfElem; ++i) {
2042 MaskVal[0] = N->getMaskElt(i * Width);
2043 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2044 return false;
2045 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2046 return false;
2047 }
2048
2049 for (unsigned int j = 1; j < Width; ++j) {
2050 MaskVal[j] = N->getMaskElt(i * Width + j);
2051 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2052 return false;
2053 }
2054 }
2055 }
2056
2057 return true;
2058}
2059
2061 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2062 if (!isNByteElemShuffleMask(N, 4, 1))
2063 return false;
2064
2065 // Now we look at mask elements 0,4,8,12
2066 unsigned M0 = N->getMaskElt(0) / 4;
2067 unsigned M1 = N->getMaskElt(4) / 4;
2068 unsigned M2 = N->getMaskElt(8) / 4;
2069 unsigned M3 = N->getMaskElt(12) / 4;
2070 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2071 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2072
2073 // Below, let H and L be arbitrary elements of the shuffle mask
2074 // where H is in the range [4,7] and L is in the range [0,3].
2075 // H, 1, 2, 3 or L, 5, 6, 7
2076 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2077 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2079 InsertAtByte = IsLE ? 12 : 0;
2080 Swap = M0 < 4;
2081 return true;
2082 }
2083 // 0, H, 2, 3 or 4, L, 6, 7
2084 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2085 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2087 InsertAtByte = IsLE ? 8 : 4;
2088 Swap = M1 < 4;
2089 return true;
2090 }
2091 // 0, 1, H, 3 or 4, 5, L, 7
2092 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2093 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2095 InsertAtByte = IsLE ? 4 : 8;
2096 Swap = M2 < 4;
2097 return true;
2098 }
2099 // 0, 1, 2, H or 4, 5, 6, L
2100 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2101 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2103 InsertAtByte = IsLE ? 0 : 12;
2104 Swap = M3 < 4;
2105 return true;
2106 }
2107
2108 // If both vector operands for the shuffle are the same vector, the mask will
2109 // contain only elements from the first one and the second one will be undef.
2110 if (N->getOperand(1).isUndef()) {
2111 ShiftElts = 0;
2112 Swap = true;
2113 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2114 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2115 InsertAtByte = IsLE ? 12 : 0;
2116 return true;
2117 }
2118 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2119 InsertAtByte = IsLE ? 8 : 4;
2120 return true;
2121 }
2122 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2123 InsertAtByte = IsLE ? 4 : 8;
2124 return true;
2125 }
2126 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2127 InsertAtByte = IsLE ? 0 : 12;
2128 return true;
2129 }
2130 }
2131
2132 return false;
2133}
2134
2136 bool &Swap, bool IsLE) {
2137 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2138 // Ensure each byte index of the word is consecutive.
2139 if (!isNByteElemShuffleMask(N, 4, 1))
2140 return false;
2141
2142 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2143 unsigned M0 = N->getMaskElt(0) / 4;
2144 unsigned M1 = N->getMaskElt(4) / 4;
2145 unsigned M2 = N->getMaskElt(8) / 4;
2146 unsigned M3 = N->getMaskElt(12) / 4;
2147
2148 // If both vector operands for the shuffle are the same vector, the mask will
2149 // contain only elements from the first one and the second one will be undef.
2150 if (N->getOperand(1).isUndef()) {
2151 assert(M0 < 4 && "Indexing into an undef vector?");
2152 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2153 return false;
2154
2155 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2156 Swap = false;
2157 return true;
2158 }
2159
2160 // Ensure each word index of the ShuffleVector Mask is consecutive.
2161 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2162 return false;
2163
2164 if (IsLE) {
2165 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2166 // Input vectors don't need to be swapped if the leading element
2167 // of the result is one of the 3 left elements of the second vector
2168 // (or if there is no shift to be done at all).
2169 Swap = false;
2170 ShiftElts = (8 - M0) % 8;
2171 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2172 // Input vectors need to be swapped if the leading element
2173 // of the result is one of the 3 left elements of the first vector
2174 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2175 Swap = true;
2176 ShiftElts = (4 - M0) % 4;
2177 }
2178
2179 return true;
2180 } else { // BE
2181 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2182 // Input vectors don't need to be swapped if the leading element
2183 // of the result is one of the 4 elements of the first vector.
2184 Swap = false;
2185 ShiftElts = M0;
2186 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2187 // Input vectors need to be swapped if the leading element
2188 // of the result is one of the 4 elements of the right vector.
2189 Swap = true;
2190 ShiftElts = M0 - 4;
2191 }
2192
2193 return true;
2194 }
2195}
2196
2198 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2199
2200 if (!isNByteElemShuffleMask(N, Width, -1))
2201 return false;
2202
2203 for (int i = 0; i < 16; i += Width)
2204 if (N->getMaskElt(i) != i + Width - 1)
2205 return false;
2206
2207 return true;
2208}
2209
2213
2217
2221
2225
2226/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2227/// if the inputs to the instruction should be swapped and set \p DM to the
2228/// value for the immediate.
2229/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2230/// AND element 0 of the result comes from the first input (LE) or second input
2231/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2232/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2233/// mask.
2235 bool &Swap, bool IsLE) {
2236 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2237
2238 // Ensure each byte index of the double word is consecutive.
2239 if (!isNByteElemShuffleMask(N, 8, 1))
2240 return false;
2241
2242 unsigned M0 = N->getMaskElt(0) / 8;
2243 unsigned M1 = N->getMaskElt(8) / 8;
2244 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2245
2246 // If both vector operands for the shuffle are the same vector, the mask will
2247 // contain only elements from the first one and the second one will be undef.
2248 if (N->getOperand(1).isUndef()) {
2249 if ((M0 | M1) < 2) {
2250 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2251 Swap = false;
2252 return true;
2253 } else
2254 return false;
2255 }
2256
2257 if (IsLE) {
2258 if (M0 > 1 && M1 < 2) {
2259 Swap = false;
2260 } else if (M0 < 2 && M1 > 1) {
2261 M0 = (M0 + 2) % 4;
2262 M1 = (M1 + 2) % 4;
2263 Swap = true;
2264 } else
2265 return false;
2266
2267 // Note: if control flow comes here that means Swap is already set above
2268 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2269 return true;
2270 } else { // BE
2271 if (M0 < 2 && M1 > 1) {
2272 Swap = false;
2273 } else if (M0 > 1 && M1 < 2) {
2274 M0 = (M0 + 2) % 4;
2275 M1 = (M1 + 2) % 4;
2276 Swap = true;
2277 } else
2278 return false;
2279
2280 // Note: if control flow comes here that means Swap is already set above
2281 DM = (M0 << 1) + (M1 & 1);
2282 return true;
2283 }
2284}
2285
2286
2287/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2288/// appropriate for PPC mnemonics (which have a big endian bias - namely
2289/// elements are counted from the left of the vector register).
2290unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2291 SelectionDAG &DAG) {
2293 assert(isSplatShuffleMask(SVOp, EltSize));
2294 if (DAG.getDataLayout().isLittleEndian())
2295 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2296 else
2297 return SVOp->getMaskElt(0) / EltSize;
2298}
2299
2300/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2301/// by using a vspltis[bhw] instruction of the specified element size, return
2302/// the constant being splatted. The ByteSize field indicates the number of
2303/// bytes of each element [124] -> [bhw].
2305 SDValue OpVal(nullptr, 0);
2306
2307 // If ByteSize of the splat is bigger than the element size of the
2308 // build_vector, then we have a case where we are checking for a splat where
2309 // multiple elements of the buildvector are folded together into a single
2310 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2311 unsigned EltSize = 16/N->getNumOperands();
2312 if (EltSize < ByteSize) {
2313 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2315 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2316
2317 // See if all of the elements in the buildvector agree across.
2318 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2319 if (N->getOperand(i).isUndef()) continue;
2320 // If the element isn't a constant, bail fully out.
2321 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2322
2323 if (!UniquedVals[i&(Multiple-1)].getNode())
2324 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2325 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2326 return SDValue(); // no match.
2327 }
2328
2329 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2330 // either constant or undef values that are identical for each chunk. See
2331 // if these chunks can form into a larger vspltis*.
2332
2333 // Check to see if all of the leading entries are either 0 or -1. If
2334 // neither, then this won't fit into the immediate field.
2335 bool LeadingZero = true;
2336 bool LeadingOnes = true;
2337 for (unsigned i = 0; i != Multiple-1; ++i) {
2338 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2339
2342 }
2343 // Finally, check the least significant entry.
2344 if (LeadingZero) {
2345 if (!UniquedVals[Multiple-1].getNode())
2346 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2347 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2348 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2349 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2350 }
2351 if (LeadingOnes) {
2352 if (!UniquedVals[Multiple-1].getNode())
2353 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2354 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2355 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2356 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2357 }
2358
2359 return SDValue();
2360 }
2361
2362 // Check to see if this buildvec has a single non-undef value in its elements.
2363 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2364 if (N->getOperand(i).isUndef()) continue;
2365 if (!OpVal.getNode())
2366 OpVal = N->getOperand(i);
2367 else if (OpVal != N->getOperand(i))
2368 return SDValue();
2369 }
2370
2371 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2372
2373 unsigned ValSizeInBytes = EltSize;
2374 uint64_t Value = 0;
2376 Value = CN->getZExtValue();
2378 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2379 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2380 }
2381
2382 // If the splat value is larger than the element value, then we can never do
2383 // this splat. The only case that we could fit the replicated bits into our
2384 // immediate field for would be zero, and we prefer to use vxor for it.
2385 if (ValSizeInBytes < ByteSize) return SDValue();
2386
2387 // If the element value is larger than the splat value, check if it consists
2388 // of a repeated bit pattern of size ByteSize.
2389 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2390 return SDValue();
2391
2392 // Properly sign extend the value.
2393 int MaskVal = SignExtend32(Value, ByteSize * 8);
2394
2395 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2396 if (MaskVal == 0) return SDValue();
2397
2398 // Finally, if this value fits in a 5 bit sext field, return it
2400 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2401 return SDValue();
2402}
2403
2404//===----------------------------------------------------------------------===//
2405// Addressing Mode Selection
2406//===----------------------------------------------------------------------===//
2407
2408/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2409/// or 64-bit immediate, and if the value can be accurately represented as a
2410/// sign extension from a 16-bit value. If so, this returns true and the
2411/// immediate.
2412bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2413 if (!isa<ConstantSDNode>(N))
2414 return false;
2415
2416 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2417 if (N->getValueType(0) == MVT::i32)
2418 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2419 else
2420 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2421}
2422bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2423 return isIntS16Immediate(Op.getNode(), Imm);
2424}
2425
2426
2427/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2428/// be represented as an indexed [r+r] operation.
2430 SDValue &Index,
2431 SelectionDAG &DAG) const {
2432 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2433 UI != E; ++UI) {
2434 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2435 if (Memop->getMemoryVT() == MVT::f64) {
2436 Base = N.getOperand(0);
2437 Index = N.getOperand(1);
2438 return true;
2439 }
2440 }
2441 }
2442 return false;
2443}
2444
2445/// isIntS34Immediate - This method tests if value of node given can be
2446/// accurately represented as a sign extension from a 34-bit value. If so,
2447/// this returns true and the immediate.
2448bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2449 if (!isa<ConstantSDNode>(N))
2450 return false;
2451
2452 Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2453 return isInt<34>(Imm);
2454}
2455bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2456 return isIntS34Immediate(Op.getNode(), Imm);
2457}
2458
2459/// SelectAddressRegReg - Given the specified addressed, check to see if it
2460/// can be represented as an indexed [r+r] operation. Returns false if it
2461/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2462/// non-zero and N can be represented by a base register plus a signed 16-bit
2463/// displacement, make a more precise judgement by checking (displacement % \p
2464/// EncodingAlignment).
2466 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2468 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2469 // a [pc+imm].
2470 if (SelectAddressPCRel(N, Base))
2471 return false;
2472
2473 int16_t Imm = 0;
2474 if (N.getOpcode() == ISD::ADD) {
2475 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2476 // SPE load/store can only handle 8-bit offsets.
2477 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2478 return true;
2479 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2481 return false; // r+i
2482 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2483 return false; // r+i
2484
2485 Base = N.getOperand(0);
2486 Index = N.getOperand(1);
2487 return true;
2488 } else if (N.getOpcode() == ISD::OR) {
2489 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2491 return false; // r+i can fold it if we can.
2492
2493 // If this is an or of disjoint bitfields, we can codegen this as an add
2494 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2495 // disjoint.
2496 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2497
2498 if (LHSKnown.Zero.getBoolValue()) {
2499 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2500 // If all of the bits are known zero on the LHS or RHS, the add won't
2501 // carry.
2502 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2503 Base = N.getOperand(0);
2504 Index = N.getOperand(1);
2505 return true;
2506 }
2507 }
2508 }
2509
2510 return false;
2511}
2512
2513// If we happen to be doing an i64 load or store into a stack slot that has
2514// less than a 4-byte alignment, then the frame-index elimination may need to
2515// use an indexed load or store instruction (because the offset may not be a
2516// multiple of 4). The extra register needed to hold the offset comes from the
2517// register scavenger, and it is possible that the scavenger will need to use
2518// an emergency spill slot. As a result, we need to make sure that a spill slot
2519// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2520// stack slot.
2521static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2522 // FIXME: This does not handle the LWA case.
2523 if (VT != MVT::i64)
2524 return;
2525
2526 // NOTE: We'll exclude negative FIs here, which come from argument
2527 // lowering, because there are no known test cases triggering this problem
2528 // using packed structures (or similar). We can remove this exclusion if
2529 // we find such a test case. The reason why this is so test-case driven is
2530 // because this entire 'fixup' is only to prevent crashes (from the
2531 // register scavenger) on not-really-valid inputs. For example, if we have:
2532 // %a = alloca i1
2533 // %b = bitcast i1* %a to i64*
2534 // store i64* a, i64 b
2535 // then the store should really be marked as 'align 1', but is not. If it
2536 // were marked as 'align 1' then the indexed form would have been
2537 // instruction-selected initially, and the problem this 'fixup' is preventing
2538 // won't happen regardless.
2539 if (FrameIdx < 0)
2540 return;
2541
2543 MachineFrameInfo &MFI = MF.getFrameInfo();
2544
2545 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2546 return;
2547
2548 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2549 FuncInfo->setHasNonRISpills();
2550}
2551
2552/// Returns true if the address N can be represented by a base register plus
2553/// a signed 16-bit displacement [r+imm], and if it is not better
2554/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2555/// displacements that are multiples of that value.
2557 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2559 // FIXME dl should come from parent load or store, not from address
2560 SDLoc dl(N);
2561
2562 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2563 // a [pc+imm].
2564 if (SelectAddressPCRel(N, Base))
2565 return false;
2566
2567 // If this can be more profitably realized as r+r, fail.
2568 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2569 return false;
2570
2571 if (N.getOpcode() == ISD::ADD) {
2572 int16_t imm = 0;
2573 if (isIntS16Immediate(N.getOperand(1), imm) &&
2575 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2576 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2577 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2578 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2579 } else {
2580 Base = N.getOperand(0);
2581 }
2582 return true; // [r+i]
2583 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2584 // Match LOAD (ADD (X, Lo(G))).
2585 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2586 && "Cannot handle constant offsets yet!");
2587 Disp = N.getOperand(1).getOperand(0); // The global address.
2592 Base = N.getOperand(0);
2593 return true; // [&g+r]
2594 }
2595 } else if (N.getOpcode() == ISD::OR) {
2596 int16_t imm = 0;
2597 if (isIntS16Immediate(N.getOperand(1), imm) &&
2599 // If this is an or of disjoint bitfields, we can codegen this as an add
2600 // (for better address arithmetic) if the LHS and RHS of the OR are
2601 // provably disjoint.
2602 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2603
2604 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2605 // If all of the bits are known zero on the LHS or RHS, the add won't
2606 // carry.
2607 if (FrameIndexSDNode *FI =
2608 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2609 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2610 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2611 } else {
2612 Base = N.getOperand(0);
2613 }
2614 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2615 return true;
2616 }
2617 }
2619 // Loading from a constant address.
2620
2621 // If this address fits entirely in a 16-bit sext immediate field, codegen
2622 // this as "d, 0"
2623 int16_t Imm;
2624 if (isIntS16Immediate(CN, Imm) &&
2626 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2627 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2628 CN->getValueType(0));
2629 return true;
2630 }
2631
2632 // Handle 32-bit sext immediates with LIS + addr mode.
2633 if ((CN->getValueType(0) == MVT::i32 ||
2634 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2636 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2637 int Addr = (int)CN->getZExtValue();
2638
2639 // Otherwise, break this down into an LIS + disp.
2640 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2641
2642 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2643 MVT::i32);
2644 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2645 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2646 return true;
2647 }
2648 }
2649
2650 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2652 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2653 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2654 } else
2655 Base = N;
2656 return true; // [r+0]
2657}
2658
2659/// Similar to the 16-bit case but for instructions that take a 34-bit
2660/// displacement field (prefixed loads/stores).
2662 SDValue &Base,
2663 SelectionDAG &DAG) const {
2664 // Only on 64-bit targets.
2665 if (N.getValueType() != MVT::i64)
2666 return false;
2667
2668 SDLoc dl(N);
2669 int64_t Imm = 0;
2670
2671 if (N.getOpcode() == ISD::ADD) {
2672 if (!isIntS34Immediate(N.getOperand(1), Imm))
2673 return false;
2674 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2675 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2676 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2677 else
2678 Base = N.getOperand(0);
2679 return true;
2680 }
2681
2682 if (N.getOpcode() == ISD::OR) {
2683 if (!isIntS34Immediate(N.getOperand(1), Imm))
2684 return false;
2685 // If this is an or of disjoint bitfields, we can codegen this as an add
2686 // (for better address arithmetic) if the LHS and RHS of the OR are
2687 // provably disjoint.
2688 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2689 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2690 return false;
2691 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2692 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2693 else
2694 Base = N.getOperand(0);
2695 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2696 return true;
2697 }
2698
2699 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2700 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2701 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2702 return true;
2703 }
2704
2705 return false;
2706}
2707
2708/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2709/// represented as an indexed [r+r] operation.
2711 SDValue &Index,
2712 SelectionDAG &DAG) const {
2713 // Check to see if we can easily represent this as an [r+r] address. This
2714 // will fail if it thinks that the address is more profitably represented as
2715 // reg+imm, e.g. where imm = 0.
2716 if (SelectAddressRegReg(N, Base, Index, DAG))
2717 return true;
2718
2719 // If the address is the result of an add, we will utilize the fact that the
2720 // address calculation includes an implicit add. However, we can reduce
2721 // register pressure if we do not materialize a constant just for use as the
2722 // index register. We only get rid of the add if it is not an add of a
2723 // value and a 16-bit signed constant and both have a single use.
2724 int16_t imm = 0;
2725 if (N.getOpcode() == ISD::ADD &&
2726 (!isIntS16Immediate(N.getOperand(1), imm) ||
2727 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2728 Base = N.getOperand(0);
2729 Index = N.getOperand(1);
2730 return true;
2731 }
2732
2733 // Otherwise, do it the hard way, using R0 as the base register.
2734 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2735 N.getValueType());
2736 Index = N;
2737 return true;
2738}
2739
2740template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2741 Ty *PCRelCand = dyn_cast<Ty>(N);
2742 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2743}
2744
2745/// Returns true if this address is a PC Relative address.
2746/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2747/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2749 // This is a materialize PC Relative node. Always select this as PC Relative.
2750 Base = N;
2751 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2752 return true;
2757 return true;
2758 return false;
2759}
2760
2761/// Returns true if we should use a direct load into vector instruction
2762/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2763static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2764
2765 // If there are any other uses other than scalar to vector, then we should
2766 // keep it as a scalar load -> direct move pattern to prevent multiple
2767 // loads.
2769 if (!LD)
2770 return false;
2771
2772 EVT MemVT = LD->getMemoryVT();
2773 if (!MemVT.isSimple())
2774 return false;
2775 switch(MemVT.getSimpleVT().SimpleTy) {
2776 case MVT::i64:
2777 break;
2778 case MVT::i32:
2779 if (!ST.hasP8Vector())
2780 return false;
2781 break;
2782 case MVT::i16:
2783 case MVT::i8:
2784 if (!ST.hasP9Vector())
2785 return false;
2786 break;
2787 default:
2788 return false;
2789 }
2790
2791 SDValue LoadedVal(N, 0);
2792 if (!LoadedVal.hasOneUse())
2793 return false;
2794
2795 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2796 UI != UE; ++UI)
2797 if (UI.getUse().get().getResNo() == 0 &&
2798 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2799 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2800 return false;
2801
2802 return true;
2803}
2804
2805/// getPreIndexedAddressParts - returns true by value, base pointer and
2806/// offset pointer and addressing mode by reference if the node's address
2807/// can be legally represented as pre-indexed load / store address.
2809 SDValue &Offset,
2811 SelectionDAG &DAG) const {
2812 if (DisablePPCPreinc) return false;
2813
2814 bool isLoad = true;
2815 SDValue Ptr;
2816 EVT VT;
2817 unsigned Alignment;
2818 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2819 Ptr = LD->getBasePtr();
2820 VT = LD->getMemoryVT();
2821 Alignment = LD->getAlignment();
2822 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2823 Ptr = ST->getBasePtr();
2824 VT = ST->getMemoryVT();
2825 Alignment = ST->getAlignment();
2826 isLoad = false;
2827 } else
2828 return false;
2829
2830 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2831 // instructions because we can fold these into a more efficient instruction
2832 // instead, (such as LXSD).
2833 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2834 return false;
2835 }
2836
2837 // PowerPC doesn't have preinc load/store instructions for vectors
2838 if (VT.isVector())
2839 return false;
2840
2841 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2842 // Common code will reject creating a pre-inc form if the base pointer
2843 // is a frame index, or if N is a store and the base pointer is either
2844 // the same as or a predecessor of the value being stored. Check for
2845 // those situations here, and try with swapped Base/Offset instead.
2846 bool Swap = false;
2847
2848 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2849 Swap = true;
2850 else if (!isLoad) {
2851 SDValue Val = cast<StoreSDNode>(N)->getValue();
2852 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2853 Swap = true;
2854 }
2855
2856 if (Swap)
2857 std::swap(Base, Offset);
2858
2859 AM = ISD::PRE_INC;
2860 return true;
2861 }
2862
2863 // LDU/STU can only handle immediates that are a multiple of 4.
2864 if (VT != MVT::i64) {
2865 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2866 return false;
2867 } else {
2868 // LDU/STU need an address with at least 4-byte alignment.
2869 if (Alignment < 4)
2870 return false;
2871
2872 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2873 return false;
2874 }
2875
2876 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2877 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2878 // sext i32 to i64 when addr mode is r+i.
2879 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2880 LD->getExtensionType() == ISD::SEXTLOAD &&
2882 return false;
2883 }
2884
2885 AM = ISD::PRE_INC;
2886 return true;
2887}
2888
2889//===----------------------------------------------------------------------===//
2890// LowerOperation implementation
2891//===----------------------------------------------------------------------===//
2892
2893/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2894/// and LoOpFlags to the target MO flags.
2895static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2896 unsigned &HiOpFlags, unsigned &LoOpFlags,
2897 const GlobalValue *GV = nullptr) {
2900
2901 // Don't use the pic base if not in PIC relocation model.
2902 if (IsPIC) {
2905 }
2906}
2907
2909 SelectionDAG &DAG) {
2910 SDLoc DL(HiPart);
2911 EVT PtrVT = HiPart.getValueType();
2912 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2913
2914 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2915 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2916
2917 // With PIC, the first instruction is actually "GR+hi(&G)".
2918 if (isPIC)
2919 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2921
2922 // Generate non-pic code that has direct accesses to the constant pool.
2923 // The address of the global is just (hi(&g)+lo(&g)).
2924 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2925}
2926
2928 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2929 FuncInfo->setUsesTOCBasePtr();
2930}
2931
2935
2936SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2937 SDValue GA) const {
2938 const bool Is64Bit = Subtarget.isPPC64();
2939 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2940 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2941 : Subtarget.isAIXABI()
2942 ? DAG.getRegister(PPC::R2, VT)
2943 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2944 SDValue Ops[] = { GA, Reg };
2945 return DAG.getMemIntrinsicNode(
2946 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2949}
2950
2951SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2952 SelectionDAG &DAG) const {
2953 EVT PtrVT = Op.getValueType();
2955 const Constant *C = CP->getConstVal();
2956
2957 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2958 // The actual address of the GlobalValue is stored in the TOC.
2959 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2960 if (Subtarget.isUsingPCRelativeCalls()) {
2961 SDLoc DL(CP);
2962 EVT Ty = getPointerTy(DAG.getDataLayout());
2963 SDValue ConstPool = DAG.getTargetConstantPool(
2964 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2965 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2966 }
2967 setUsesTOCBasePtr(DAG);
2968 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2969 return getTOCEntry(DAG, SDLoc(CP), GA);
2970 }
2971
2972 unsigned MOHiFlag, MOLoFlag;
2973 bool IsPIC = isPositionIndependent();
2974 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2975
2976 if (IsPIC && Subtarget.isSVR4ABI()) {
2977 SDValue GA =
2979 return getTOCEntry(DAG, SDLoc(CP), GA);
2980 }
2981
2982 SDValue CPIHi =
2983 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2984 SDValue CPILo =
2985 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2986 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2987}
2988
2989// For 64-bit PowerPC, prefer the more compact relative encodings.
2990// This trades 32 bits per jump table entry for one or two instructions
2991// on the jump site.
2998
3001 return false;
3002 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3003 return true;
3005}
3006
3008 SelectionDAG &DAG) const {
3009 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3011
3012 switch (getTargetMachine().getCodeModel()) {
3013 case CodeModel::Small:
3014 case CodeModel::Medium:
3016 default:
3017 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3019 }
3020}
3021
3022const MCExpr *
3024 unsigned JTI,
3025 MCContext &Ctx) const {
3026 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3028
3029 switch (getTargetMachine().getCodeModel()) {
3030 case CodeModel::Small:
3031 case CodeModel::Medium:
3033 default:
3034 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3035 }
3036}
3037
3038SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3039 EVT PtrVT = Op.getValueType();
3041
3042 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3043 if (Subtarget.isUsingPCRelativeCalls()) {
3044 SDLoc DL(JT);
3045 EVT Ty = getPointerTy(DAG.getDataLayout());
3046 SDValue GA =
3047 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3049 return MatAddr;
3050 }
3051
3052 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3053 // The actual address of the GlobalValue is stored in the TOC.
3054 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3055 setUsesTOCBasePtr(DAG);
3056 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3057 return getTOCEntry(DAG, SDLoc(JT), GA);
3058 }
3059
3060 unsigned MOHiFlag, MOLoFlag;
3061 bool IsPIC = isPositionIndependent();
3062 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3063
3064 if (IsPIC && Subtarget.isSVR4ABI()) {
3065 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3067 return getTOCEntry(DAG, SDLoc(GA), GA);
3068 }
3069
3070 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3071 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3072 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3073}
3074
3075SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3076 SelectionDAG &DAG) const {
3077 EVT PtrVT = Op.getValueType();
3079 const BlockAddress *BA = BASDN->getBlockAddress();
3080
3081 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3082 if (Subtarget.isUsingPCRelativeCalls()) {
3083 SDLoc DL(BASDN);
3084 EVT Ty = getPointerTy(DAG.getDataLayout());
3085 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3088 return MatAddr;
3089 }
3090
3091 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3092 // The actual BlockAddress is stored in the TOC.
3093 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3094 setUsesTOCBasePtr(DAG);
3095 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3096 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3097 }
3098
3099 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3100 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3101 return getTOCEntry(
3102 DAG, SDLoc(BASDN),
3103 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3104
3105 unsigned MOHiFlag, MOLoFlag;
3106 bool IsPIC = isPositionIndependent();
3107 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3110 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3111}
3112
3113SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3114 SelectionDAG &DAG) const {
3115 // FIXME: TLS addresses currently use medium model code sequences,
3116 // which is the most useful form. Eventually support for small and
3117 // large models could be added if users need it, at the cost of
3118 // additional complexity.
3120 if (DAG.getTarget().useEmulatedTLS())
3121 return LowerToTLSEmulatedModel(GA, DAG);
3122
3123 SDLoc dl(GA);
3124 const GlobalValue *GV = GA->getGlobal();
3126 bool is64bit = Subtarget.isPPC64();
3127 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3128 PICLevel::Level picLevel = M->getPICLevel();
3129
3131 TLSModel::Model Model = TM.getTLSModel(GV);
3132
3133 if (Model == TLSModel::LocalExec) {
3134 if (Subtarget.isUsingPCRelativeCalls()) {
3135 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3140 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3141 }
3142
3143 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3145 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3147 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3148 : DAG.getRegister(PPC::R2, MVT::i32);
3149
3150 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3151 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3152 }
3153
3154 if (Model == TLSModel::InitialExec) {
3155 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3157 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3159 GV, dl, PtrVT, 0,
3160 IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3162 if (IsPCRel) {
3164 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3166 } else {
3168 if (is64bit) {
3169 setUsesTOCBasePtr(DAG);
3170 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3171 GOTPtr =
3173 } else {
3174 if (!TM.isPositionIndependent())
3176 else if (picLevel == PICLevel::SmallPIC)
3178 else
3180 }
3182 }
3183 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3184 }
3185
3186 if (Model == TLSModel::GeneralDynamic) {
3187 if (Subtarget.isUsingPCRelativeCalls()) {
3188 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3191 }
3192
3193 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3195 if (is64bit) {
3196 setUsesTOCBasePtr(DAG);
3197 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3199 GOTReg, TGA);
3200 } else {
3203 else
3205 }
3206 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3207 GOTPtr, TGA, TGA);
3208 }
3209
3210 if (Model == TLSModel::LocalDynamic) {
3211 if (Subtarget.isUsingPCRelativeCalls()) {
3212 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3216 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3217 }
3218
3219 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3221 if (is64bit) {
3222 setUsesTOCBasePtr(DAG);
3223 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3225 GOTReg, TGA);
3226 } else {
3229 else
3231 }
3233 PtrVT, GOTPtr, TGA, TGA);
3235 PtrVT, TLSAddr, TGA);
3237 }
3238
3239 llvm_unreachable("Unknown TLS model!");
3240}
3241
3242SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3243 SelectionDAG &DAG) const {
3244 EVT PtrVT = Op.getValueType();
3246 SDLoc DL(GSDN);
3247 const GlobalValue *GV = GSDN->getGlobal();
3248
3249 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3250 // The actual address of the GlobalValue is stored in the TOC.
3251 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3252 if (Subtarget.isUsingPCRelativeCalls()) {
3253 EVT Ty = getPointerTy(DAG.getDataLayout());
3254 if (isAccessedAsGotIndirect(Op)) {
3255 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3261 return Load;
3262 } else {
3263 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3265 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3266 }
3267 }
3268 setUsesTOCBasePtr(DAG);
3269 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3270 return getTOCEntry(DAG, DL, GA);
3271 }
3272
3273 unsigned MOHiFlag, MOLoFlag;
3274 bool IsPIC = isPositionIndependent();
3275 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3276
3277 if (IsPIC && Subtarget.isSVR4ABI()) {
3279 GSDN->getOffset(),
3281 return getTOCEntry(DAG, DL, GA);
3282 }
3283
3284 SDValue GAHi =
3285 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3286 SDValue GALo =
3287 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3288
3289 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3290}
3291
3292SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3293 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3294 SDLoc dl(Op);
3295
3296 if (Op.getValueType() == MVT::v2i64) {
3297 // When the operands themselves are v2i64 values, we need to do something
3298 // special because VSX has no underlying comparison operations for these.
3299 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3300 // Equality can be handled by casting to the legal type for Altivec
3301 // comparisons, everything else needs to be expanded.
3302 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3303 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3304 DAG.getSetCC(dl, MVT::v4i32,
3305 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3306 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3307 CC));
3308 }
3309
3310 return SDValue();
3311 }
3312
3313 // We handle most of these in the usual way.
3314 return Op;
3315 }
3316
3317 // If we're comparing for equality to zero, expose the fact that this is
3318 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3319 // fold the new nodes.
3320 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3321 return V;
3322
3323 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3324 // Leave comparisons against 0 and -1 alone for now, since they're usually
3325 // optimized. FIXME: revisit this when we can custom lower all setcc
3326 // optimizations.
3327 if (C->isAllOnesValue() || C->isNullValue())
3328 return SDValue();
3329 }
3330
3331 // If we have an integer seteq/setne, turn it into a compare against zero
3332 // by xor'ing the rhs with the lhs, which is faster than setting a
3333 // condition register, reading it back out, and masking the correct bit. The
3334 // normal approach here uses sub to do this instead of xor. Using xor exposes
3335 // the result to other bit-twiddling opportunities.
3336 EVT LHSVT = Op.getOperand(0).getValueType();
3337 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3338 EVT VT = Op.getValueType();
3339 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3340 Op.getOperand(1));
3341 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3342 }
3343 return SDValue();
3344}
3345
3346SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3347 SDNode *Node = Op.getNode();
3348 EVT VT = Node->getValueType(0);
3350 SDValue InChain = Node->getOperand(0);
3351 SDValue VAListPtr = Node->getOperand(1);
3352 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3353 SDLoc dl(Node);
3354
3355 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3356
3357 // gpr_index
3360 InChain = GprIndex.getValue(1);
3361
3362 if (VT == MVT::i64) {
3363 // Check if GprIndex is even
3365 DAG.getConstant(1, dl, MVT::i32));
3366 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3367 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3369 DAG.getConstant(1, dl, MVT::i32));
3370 // Align GprIndex to be even if it isn't
3372 GprIndex);
3373 }
3374
3375 // fpr index is 1 byte after gpr
3377 DAG.getConstant(1, dl, MVT::i32));
3378
3379 // fpr
3382 InChain = FprIndex.getValue(1);
3383
3385 DAG.getConstant(8, dl, MVT::i32));
3386
3388 DAG.getConstant(4, dl, MVT::i32));
3389
3390 // areas
3393 InChain = OverflowArea.getValue(1);
3394
3397 InChain = RegSaveArea.getValue(1);
3398
3399 // select overflow_area if index > 8
3400 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3401 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3402
3403 // adjustment constant gpr_index * 4/8
3405 VT.isInteger() ? GprIndex : FprIndex,
3406 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3407 MVT::i32));
3408
3409 // OurReg = RegSaveArea + RegConstant
3411 RegConstant);
3412
3413 // Floating types are 32 bytes into RegSaveArea
3414 if (VT.isFloatingPoint())
3415 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3416 DAG.getConstant(32, dl, MVT::i32));
3417
3418 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3420 VT.isInteger() ? GprIndex : FprIndex,
3421 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3422 MVT::i32));
3423
3425 VT.isInteger() ? VAListPtr : FprPtr,
3426 MachinePointerInfo(SV), MVT::i8);
3427
3428 // determine if we should load from reg_save_area or overflow_area
3430
3431 // increase overflow_area by 4/8 if gpr/fpr > 8
3433 DAG.getConstant(VT.isInteger() ? 4 : 8,
3434 dl, MVT::i32));
3435
3438
3441
3442 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3443}
3444
3445SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3446 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3447
3448 // We have to copy the entire va_list struct:
3449 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3450 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3451 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3452 false, true, false, MachinePointerInfo(),
3454}
3455
3456SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3457 SelectionDAG &DAG) const {
3458 if (Subtarget.isAIXABI())
3459 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3460
3461 return Op.getOperand(0);
3462}
3463
3464SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3465 SelectionDAG &DAG) const {
3466 if (Subtarget.isAIXABI())
3467 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3468
3469 SDValue Chain = Op.getOperand(0);
3470 SDValue Trmp = Op.getOperand(1); // trampoline
3471 SDValue FPtr = Op.getOperand(2); // nested function
3472 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3473 SDLoc dl(Op);
3474
3476 bool isPPC64 = (PtrVT == MVT::i64);
3477 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3478
3481
3482 Entry.Ty = IntPtrTy;
3483 Entry.Node = Trmp; Args.push_back(Entry);
3484
3485 // TrampSize == (isPPC64 ? 48 : 40);
3486 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3487 isPPC64 ? MVT::i64 : MVT::i32);
3488 Args.push_back(Entry);
3489
3490 Entry.Node = FPtr; Args.push_back(Entry);
3491 Entry.Node = Nest; Args.push_back(Entry);
3492
3493 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3495 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3497 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3498
3499 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3500 return CallResult.second;
3501}
3502
3503SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3505 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3507
3508 SDLoc dl(Op);
3509
3510 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3511 // vastart just stores the address of the VarArgsFrameIndex slot into the
3512 // memory location argument.
3514 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3515 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3516 MachinePointerInfo(SV));
3517 }
3518
3519 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3520 // We suppose the given va_list is already allocated.
3521 //
3522 // typedef struct {
3523 // char gpr; /* index into the array of 8 GPRs
3524 // * stored in the register save area
3525 // * gpr=0 corresponds to r3,
3526 // * gpr=1 to r4, etc.
3527 // */
3528 // char fpr; /* index into the array of 8 FPRs
3529 // * stored in the register save area
3530 // * fpr=0 corresponds to f1,
3531 // * fpr=1 to f2, etc.
3532 // */
3533 // char *overflow_arg_area;
3534 // /* location on stack that holds
3535 // * the next overflow argument
3536 // */
3537 // char *reg_save_area;
3538 // /* where r3:r10 and f1:f8 (if saved)
3539 // * are stored
3540 // */
3541 // } va_list[1];
3542
3543 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3544 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3546 PtrVT);
3547 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3548 PtrVT);
3549
3550 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3551 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3552
3553 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3555
3556 uint64_t FPROffset = 1;
3557 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3558
3559 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3560
3561 // Store first byte : number of int regs
3563 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3565 uint64_t nextOffset = FPROffset;
3566 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3568
3569 // Store second byte : number of float regs
3575
3576 // Store second word : arguments given on stack
3579 nextOffset += FrameOffset;
3581
3582 // Store third word : arguments given in registers
3583 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3585}
3586
3587/// FPR - The set of FP registers that should be allocated for arguments
3588/// on Darwin and AIX.
3589static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3590 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3591 PPC::F11, PPC::F12, PPC::F13};
3592
3593/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3594/// the stack.
3595static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3596 unsigned PtrByteSize) {
3597 unsigned ArgSize = ArgVT.getStoreSize();
3598 if (Flags.isByVal())
3599 ArgSize = Flags.getByValSize();
3600
3601 // Round up to multiples of the pointer size, except for array members,
3602 // which are always packed.
3603 if (!Flags.isInConsecutiveRegs())
3605
3606 return ArgSize;
3607}
3608
3609/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3610/// on the stack.
3612 ISD::ArgFlagsTy Flags,
3613 unsigned PtrByteSize) {
3614 Align Alignment(PtrByteSize);
3615
3616 // Altivec parameters are padded to a 16 byte boundary.
3617 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3618 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3619 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3620 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3621 Alignment = Align(16);
3622
3623 // ByVal parameters are aligned as requested.
3624 if (Flags.isByVal()) {
3625 auto BVAlign = Flags.getNonZeroByValAlign();
3626 if (BVAlign > PtrByteSize) {
3627 if (BVAlign.value() % PtrByteSize != 0)
3629 "ByVal alignment is not a multiple of the pointer size");
3630
3631 Alignment = BVAlign;
3632 }
3633 }
3634
3635 // Array members are always packed to their original alignment.
3636 if (Flags.isInConsecutiveRegs()) {
3637 // If the array member was split into multiple registers, the first
3638 // needs to be aligned to the size of the full type. (Except for
3639 // ppcf128, which is only aligned as its f64 components.)
3640 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3641 Alignment = Align(OrigVT.getStoreSize());
3642 else
3643 Alignment = Align(ArgVT.getStoreSize());
3644 }
3645
3646 return Alignment;
3647}
3648
3649/// CalculateStackSlotUsed - Return whether this argument will use its
3650/// stack slot (instead of being passed in registers). ArgOffset,
3651/// AvailableFPRs, and AvailableVRs must hold the current argument
3652/// position, and will be updated to account for this argument.
3654 unsigned PtrByteSize, unsigned LinkageSize,
3655 unsigned ParamAreaSize, unsigned &ArgOffset,
3656 unsigned &AvailableFPRs,
3657 unsigned &AvailableVRs) {
3658 bool UseMemory = false;
3659
3660 // Respect alignment of argument on the stack.
3661 Align Alignment =
3663 ArgOffset = alignTo(ArgOffset, Alignment);
3664 // If there's no space left in the argument save area, we must
3665 // use memory (this check also catches zero-sized arguments).
3666 if (ArgOffset >= LinkageSize + ParamAreaSize)
3667 UseMemory = true;
3668
3669 // Allocate argument on the stack.
3671 if (Flags.isInConsecutiveRegsLast())
3673 // If we overran the argument save area, we must use memory
3674 // (this check catches arguments passed partially in memory)
3675 if (ArgOffset > LinkageSize + ParamAreaSize)
3676 UseMemory = true;
3677
3678 // However, if the argument is actually passed in an FPR or a VR,
3679 // we don't use memory after all.
3680 if (!Flags.isByVal()) {
3681 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3682 if (AvailableFPRs > 0) {
3683 --AvailableFPRs;
3684 return false;
3685 }
3686 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3687 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3688 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3689 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3690 if (AvailableVRs > 0) {
3691 --AvailableVRs;
3692 return false;
3693 }
3694 }
3695
3696 return UseMemory;
3697}
3698
3699/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3700/// ensure minimum alignment required for target.
3702 unsigned NumBytes) {
3703 return alignTo(NumBytes, Lowering->getStackAlign());
3704}
3705
3706SDValue PPCTargetLowering::LowerFormalArguments(
3707 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3708 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3709 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3710 if (Subtarget.isAIXABI())
3711 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3712 InVals);
3713 if (Subtarget.is64BitELFABI())
3714 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3715 InVals);
3716 assert(Subtarget.is32BitELFABI());
3717 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3718 InVals);
3719}
3720
3721SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3722 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3723 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3724 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3725
3726 // 32-bit SVR4 ABI Stack Frame Layout:
3727 // +-----------------------------------+
3728 // +--> | Back chain |
3729 // | +-----------------------------------+
3730 // | | Floating-point register save area |
3731 // | +-----------------------------------+
3732 // | | General register save area |
3733 // | +-----------------------------------+
3734 // | | CR save word |
3735 // | +-----------------------------------+
3736 // | | VRSAVE save word |
3737 // | +-----------------------------------+
3738 // | | Alignment padding |
3739 // | +-----------------------------------+
3740 // | | Vector register save area |
3741 // | +-----------------------------------+
3742 // | | Local variable space |
3743 // | +-----------------------------------+
3744 // | | Parameter list area |
3745 // | +-----------------------------------+
3746 // | | LR save word |
3747 // | +-----------------------------------+
3748 // SP--> +--- | Back chain |
3749 // +-----------------------------------+
3750 //
3751 // Specifications:
3752 // System V Application Binary Interface PowerPC Processor Supplement
3753 // AltiVec Technology Programming Interface Manual
3754
3756 MachineFrameInfo &MFI = MF.getFrameInfo();
3757 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3758
3760 // Potential tail calls could cause overwriting of argument stack slots.
3761 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3762 (CallConv == CallingConv::Fast));
3763 const Align PtrAlign(4);
3764
3765 // Assign locations to all of the incoming arguments.
3767 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3768 *DAG.getContext());
3769
3770 // Reserve space for the linkage area on the stack.
3771 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3772 CCInfo.AllocateStack(LinkageSize, PtrAlign);
3773 if (useSoftFloat())
3774 CCInfo.PreAnalyzeFormalArguments(Ins);
3775
3776 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3777 CCInfo.clearWasPPCF128();
3778
3779 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3780 CCValAssign &VA = ArgLocs[i];
3781
3782 // Arguments stored in registers.
3783 if (VA.isRegLoc()) {
3784 const TargetRegisterClass *RC;
3785 EVT ValVT = VA.getValVT();
3786
3787 switch (ValVT.getSimpleVT().SimpleTy) {
3788 default:
3789 llvm_unreachable("ValVT not supported by formal arguments Lowering");
3790 case MVT::i1:
3791 case MVT::i32:
3792 RC = &PPC::GPRCRegClass;
3793 break;
3794 case MVT::f32:
3795 if (Subtarget.hasP8Vector())
3796 RC = &PPC::VSSRCRegClass;
3797 else if (Subtarget.hasSPE())
3798 RC = &PPC::GPRCRegClass;
3799 else
3800 RC = &PPC::F4RCRegClass;
3801 break;
3802 case MVT::f64:
3803 if (Subtarget.hasVSX())
3804 RC = &PPC::VSFRCRegClass;
3805 else if (Subtarget.hasSPE())
3806 // SPE passes doubles in GPR pairs.
3807 RC = &PPC::GPRCRegClass;
3808 else
3809 RC = &PPC::F8RCRegClass;
3810 break;
3811 case MVT::v16i8:
3812 case MVT::v8i16:
3813 case MVT::v4i32:
3814 RC = &PPC::VRRCRegClass;
3815 break;
3816 case MVT::v4f32:
3817 RC = &PPC::VRRCRegClass;
3818 break;
3819 case MVT::v2f64:
3820 case MVT::v2i64:
3821 RC = &PPC::VRRCRegClass;
3822 break;
3823 }
3824
3825 SDValue ArgValue;
3826 // Transform the arguments stored in physical registers into
3827 // virtual ones.
3828 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3829 assert(i + 1 < e && "No second half of double precision argument");
3830 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3831 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3832 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3833 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3834 if (!Subtarget.isLittleEndian())
3836 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3837 ArgValueHi);
3838 } else {
3839 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3840 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3841 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3842 if (ValVT == MVT::i1)
3843 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3844 }
3845
3846 InVals.push_back(ArgValue);
3847 } else {
3848 // Argument stored in memory.
3849 assert(VA.isMemLoc());
3850
3851 // Get the extended size of the argument type in stack
3852 unsigned ArgSize = VA.getLocVT().getStoreSize();
3853 // Get the actual size of the argument type
3854 unsigned ObjSize = VA.getValVT().getStoreSize();
3855 unsigned ArgOffset = VA.getLocMemOffset();
3856 // Stack objects in PPC32 are right justified.
3858 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3859
3860 // Create load nodes to retrieve arguments from the stack.
3861 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3862 InVals.push_back(
3863 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3864 }
3865 }
3866
3867 // Assign locations to all of the incoming aggregate by value arguments.
3868 // Aggregates passed by value are stored in the local variable space of the
3869 // caller's stack frame, right above the parameter list area.
3871 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3872 ByValArgLocs, *DAG.getContext());
3873
3874 // Reserve stack space for the allocations in CCInfo.
3875 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3876
3877 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3878
3879 // Area that is at least reserved in the caller of this function.
3880 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3881 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3882
3883 // Set the size that is at least reserved in caller of this function. Tail
3884 // call optimized function's reserved stack space needs to be aligned so that
3885 // taking the difference between two stack areas will result in an aligned
3886 // stack.
3887 MinReservedArea =
3888 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3889 FuncInfo->setMinReservedArea(MinReservedArea);
3890
3892
3893 // If the function takes variable number of arguments, make a frame index for
3894 // the start of the first vararg value... for expansion of llvm.va_start.
3895 if (isVarArg) {
3896 static const MCPhysReg GPArgRegs[] = {
3897 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3898 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3899 };
3900 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3901
3902 static const MCPhysReg FPArgRegs[] = {
3903 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3904 PPC::F8
3905 };
3907
3908 if (useSoftFloat() || hasSPE())
3909 NumFPArgRegs = 0;
3910
3911 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3912 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3913
3914 // Make room for NumGPArgRegs and NumFPArgRegs.
3915 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3917
3918 FuncInfo->setVarArgsStackOffset(
3919 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3920 CCInfo.getNextStackOffset(), true));
3921
3922 FuncInfo->setVarArgsFrameIndex(
3923 MFI.CreateStackObject(Depth, Align(8), false));
3925
3926 // The fixed integer arguments of a variadic function are stored to the
3927 // VarArgsFrameIndex on the stack so that they may be loaded by
3928 // dereferencing the result of va_next.
3929 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3930 // Get an existing live-in vreg, or add a new one.
3931 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3932 if (!VReg)
3933 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3934
3935 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3936 SDValue Store =
3937 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3938 MemOps.push_back(Store);
3939 // Increment the address by four for the next argument to store
3940 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3941 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3942 }
3943
3944 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3945 // is set.
3946 // The double arguments are stored to the VarArgsFrameIndex
3947 // on the stack.
3948 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3949 // Get an existing live-in vreg, or add a new one.
3950 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3951 if (!VReg)
3952 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3953
3954 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3955 SDValue Store =
3956 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3957 MemOps.push_back(Store);
3958 // Increment the address by eight for the next argument to store
3959 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3960 PtrVT);
3961 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3962 }
3963 }
3964
3965 if (!MemOps.empty())
3966 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3967
3968 return Chain;
3969}
3970
3971// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3972// value to MVT::i64 and then truncate to the correct register size.
3973SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3976 const SDLoc &dl) const {
3977 if (Flags.isSExt())
3979 DAG.getValueType(ObjectVT));
3980 else if (Flags.isZExt())
3982 DAG.getValueType(ObjectVT));
3983
3984 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3985}
3986
3987SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3988 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3989 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3990 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3991 // TODO: add description of PPC stack frame format, or at least some docs.
3992 //
3993 bool isELFv2ABI = Subtarget.isELFv2ABI();
3994 bool isLittleEndian = Subtarget.isLittleEndian();
3996 MachineFrameInfo &MFI = MF.getFrameInfo();
3997 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3998
3999 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4000 "fastcc not supported on varargs functions");
4001
4003 // Potential tail calls could cause overwriting of argument stack slots.
4004 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4005 (CallConv == CallingConv::Fast));
4006 unsigned PtrByteSize = 8;
4007 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4008
4009 static const MCPhysReg GPR[] = {
4010 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4011 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4012 };
4013 static const MCPhysReg VR[] = {
4014 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4015 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4016 };
4017
4018 const unsigned Num_GPR_Regs = array_lengthof(GPR);
4019 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4020 const unsigned Num_VR_Regs = array_lengthof(VR);
4021
4022 // Do a first pass over the arguments to determine whether the ABI
4023 // guarantees that our caller has allocated the parameter save area
4024 // on its stack frame. In the ELFv1 ABI, this is always the case;
4025 // in the ELFv2 ABI, it is true if this is a vararg function or if
4026 // any parameter is located in a stack slot.
4027
4028 bool HasParameterArea = !isELFv2ABI || isVarArg;
4030 unsigned NumBytes = LinkageSize;
4031 unsigned AvailableFPRs = Num_FPR_Regs;
4032 unsigned AvailableVRs = Num_VR_Regs;
4033 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4034 if (Ins[i].Flags.isNest())
4035 continue;
4036
4037 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4038 PtrByteSize, LinkageSize, ParamAreaSize,
4039 NumBytes, AvailableFPRs, AvailableVRs))
4040 HasParameterArea = true;
4041 }
4042
4043 // Add DAG nodes to load the arguments or copy them out of registers. On
4044 // entry to a function on PPC, the arguments start after the linkage area,
4045 // although the first ones are often in registers.
4046
4047 unsigned ArgOffset = LinkageSize;
4048 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4051 unsigned CurArgIdx = 0;
4052 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4054 bool needsLoad = false;
4055 EVT ObjectVT = Ins[ArgNo].VT;
4056 EVT OrigVT = Ins[ArgNo].ArgVT;
4057 unsigned ObjSize = ObjectVT.getStoreSize();
4058 unsigned ArgSize = ObjSize;
4059 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4060 if (Ins[ArgNo].isOrigArg()) {
4061 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4062 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4063 }
4064 // We re-align the argument offset for each argument, except when using the
4065 // fast calling convention, when we need to make sure we do that only when
4066 // we'll actually use a stack slot.
4067 unsigned CurArgOffset;
4068 Align Alignment;
4069 auto ComputeArgOffset = [&]() {
4070 /* Respect alignment of argument on the stack. */
4071 Alignment =
4073 ArgOffset = alignTo(ArgOffset, Alignment);
4075 };
4076
4077 if (CallConv != CallingConv::Fast) {
4079
4080 /* Compute GPR index associated with argument offset. */
4081 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4082 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4083 }
4084
4085 // FIXME the codegen can be much improved in some cases.
4086 // We do not have to keep everything in memory.
4087 if (Flags.isByVal()) {
4088 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4089
4090 if (CallConv == CallingConv::Fast)
4092
4093 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4094 ObjSize = Flags.getByValSize();
4096 // Empty aggregate parameters do not take up registers. Examples:
4097 // struct { } a;
4098 // union { } b;
4099 // int c[0];
4100 // etc. However, we have to provide a place-holder in InVals, so
4101 // pretend we have an 8-byte item at the current address for that
4102 // purpose.
4103 if (!ObjSize) {
4104 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4105 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4106 InVals.push_back(FIN);
4107 continue;
4108 }
4109
4110 // Create a stack object covering all stack doublewords occupied
4111 // by the argument. If the argument is (fully or partially) on
4112 // the stack, or if the argument is fully in registers but the
4113 // caller has allocated the parameter save anyway, we can refer
4114 // directly to the caller's stack frame. Otherwise, create a
4115 // local copy in our own frame.
4116 int FI;
4117 if (HasParameterArea ||
4118 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4119 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4120 else
4121 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4122 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4123
4124 // Handle aggregates smaller than 8 bytes.
4125 if (ObjSize < PtrByteSize) {
4126 // The value of the object is its address, which differs from the
4127 // address of the enclosing doubleword on big-endian systems.
4128 SDValue Arg = FIN;
4129 if (!isLittleEndian) {
4131 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4132 }
4133 InVals.push_back(Arg);
4134
4135 if (GPR_idx != Num_GPR_Regs) {
4136 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4137 FuncInfo->addLiveInAttr(VReg, Flags);
4138 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4139 SDValue Store;
4140
4141 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4142 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4143 (ObjSize == 2 ? MVT::i16 : MVT::i32));
4144 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4146 } else {
4147 // For sizes that don't fit a truncating store (3, 5, 6, 7),
4148 // store the whole register as-is to the parameter save area
4149 // slot.
4150 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4152 }
4153
4154 MemOps.push_back(Store);
4155 }
4156 // Whether we copied from a register or not, advance the offset
4157 // into the parameter save area by a full doubleword.
4159 continue;
4160 }
4161
4162 // The value of the object is its address, which is the address of
4163 // its first stack doubleword.
4164 InVals.push_back(FIN);
4165
4166 // Store whatever pieces of the object are in registers to memory.
4167 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4168 if (GPR_idx == Num_GPR_Regs)
4169 break;
4170
4171 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4172 FuncInfo->addLiveInAttr(VReg, Flags);
4173 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4174 SDValue Addr = FIN;
4175 if (j) {
4176 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4177 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4178 }
4179 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4181 MemOps.push_back(Store);
4182 ++GPR_idx;
4183 }
4184 ArgOffset += ArgSize;
4185 continue;
4186 }
4187
4188 switch (ObjectVT.getSimpleVT().SimpleTy) {
4189 default: llvm_unreachable("Unhandled argument type!");
4190 case MVT::i1:
4191 case MVT::i32:
4192 case MVT::i64:
4193 if (Flags.isNest()) {
4194 // The 'nest' parameter, if any, is passed in R11.
4195 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4196 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4197
4198 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4199 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4200
4201 break;
4202 }
4203
4204 // These can be scalar arguments or elements of an integer array type
4205 // passed directly. Clang may use those instead of "byval" aggregate
4206 // types to avoid forcing arguments to memory unnecessarily.
4207 if (GPR_idx != Num_GPR_Regs) {
4208 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4209 FuncInfo->addLiveInAttr(VReg, Flags);
4210 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4211
4212 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4213 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4214 // value to MVT::i64 and then truncate to the correct register size.
4215 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4216 } else {
4217 if (CallConv == CallingConv::Fast)
4219
4220 needsLoad = true;
4222 }
4223 if (CallConv != CallingConv::Fast || needsLoad)
4224 ArgOffset += 8;
4225 break;
4226
4227 case MVT::f32:
4228 case MVT::f64:
4229 // These can be scalar arguments or elements of a float array type
4230 // passed directly. The latter are used to implement ELFv2 homogenous
4231 // float aggregates.
4232 if (FPR_idx != Num_FPR_Regs) {
4233 unsigned VReg;
4234
4235 if (ObjectVT == MVT::f32)
4236 VReg = MF.addLiveIn(FPR[FPR_idx],
4237 Subtarget.hasP8Vector()
4238 ? &PPC::VSSRCRegClass
4239 : &PPC::F4RCRegClass);
4240 else
4241 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4242 ? &PPC::VSFRCRegClass
4243 : &PPC::F8RCRegClass);
4244
4245 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4246 ++FPR_idx;
4247 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4248 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4249 // once we support fp <-> gpr moves.
4250
4251 // This can only ever happen in the presence of f32 array types,
4252 // since otherwise we never run out of FPRs before running out
4253 // of GPRs.
4254 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4255 FuncInfo->addLiveInAttr(VReg, Flags);
4256 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4257
4258 if (ObjectVT == MVT::f32) {
4259 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4260 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4261 DAG.getConstant(32, dl, MVT::i32));
4263 }
4264
4266 } else {
4267 if (CallConv == CallingConv::Fast)
4269
4270 needsLoad = true;
4271 }
4272
4273 // When passing an array of floats, the array occupies consecutive
4274 // space in the argument area; only round up to the next doubleword
4275 // at the end of the array. Otherwise, each float takes 8 bytes.
4276 if (CallConv != CallingConv::Fast || needsLoad) {
4277 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4278 ArgOffset += ArgSize;
4279 if (Flags.isInConsecutiveRegsLast())
4281 }
4282 break;
4283 case MVT::v4f32:
4284 case MVT::v4i32:
4285 case MVT::v8i16:
4286 case MVT::v16i8:
4287 case MVT::v2f64:
4288 case MVT::v2i64:
4289 case MVT::v1i128:
4290 case MVT::f128:
4291 // These can be scalar arguments or elements of a vector array type
4292 // passed directly. The latter are used to implement ELFv2 homogenous
4293 // vector aggregates.
4294 if (VR_idx != Num_VR_Regs) {
4295 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4296 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4297 ++VR_idx;
4298 } else {
4299 if (CallConv == CallingConv::Fast)
4301 needsLoad = true;
4302 }
4303 if (CallConv != CallingConv::Fast || needsLoad)
4304 ArgOffset += 16;
4305 break;
4306 }
4307
4308 // We need to load the argument to a virtual register if we determined
4309 // above that we ran out of physical registers of the appropriate type.
4310 if (needsLoad) {
4311 if (ObjSize < ArgSize && !isLittleEndian)
4313 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4314 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4315 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4316 }
4317
4318 InVals.push_back(ArgVal);
4319 }
4320
4321 // Area that is at least reserved in the caller of this function.
4322 unsigned MinReservedArea;
4323 if (HasParameterArea)
4324 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4325 else
4326 MinReservedArea = LinkageSize;
4327
4328 // Set the size that is at least reserved in caller of this function. Tail
4329 // call optimized functions' reserved stack space needs to be aligned so that
4330 // taking the difference between two stack areas will result in an aligned
4331 // stack.
4332 MinReservedArea =
4333 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4334 FuncInfo->setMinReservedArea(MinReservedArea);
4335
4336 // If the function takes variable number of arguments, make a frame index for
4337 // the start of the first vararg value... for expansion of llvm.va_start.
4338 // On ELFv2ABI spec, it writes:
4339 // C programs that are intended to be *portable* across different compilers
4340 // and architectures must use the header file <stdarg.h> to deal with variable
4341 // argument lists.
4342 if (isVarArg && MFI.hasVAStart()) {
4343 int Depth = ArgOffset;
4344
4345 FuncInfo->setVarArgsFrameIndex(
4346 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4348
4349 // If this function is vararg, store any remaining integer argument regs
4350 // to their spots on the stack so that they may be loaded by dereferencing
4351 // the result of va_next.
4352 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4354 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4355 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4356 SDValue Store =
4357 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4358 MemOps.push_back(Store);
4359 // Increment the address by four for the next argument to store
4361 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4362 }
4363 }
4364
4365 if (!MemOps.empty())
4366 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4367
4368 return Chain;
4369}
4370
4371/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4372/// adjusted to accommodate the arguments for the tailcall.
4373static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4374 unsigned ParamSize) {
4375
4376 if (!isTailCall) return 0;
4377
4380 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4381 // Remember only if the new adjustment is bigger.
4382 if (SPDiff < FI->getTailCallSPDelta())
4384
4385 return SPDiff;
4386}
4387
4389
4390static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4391 const TargetMachine &TM) {
4392 // It does not make sense to call callsShareTOCBase() with a caller that
4393 // is PC Relative since PC Relative callers do not have a TOC.
4394#ifndef NDEBUG
4395 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4396 assert(!STICaller->isUsingPCRelativeCalls() &&
4397 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4398#endif
4399
4400 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4401 // don't have enough information to determine if the caller and callee share
4402 // the same TOC base, so we have to pessimistically assume they don't for
4403 // correctness.
4405 if (!G)
4406 return false;
4407
4408 const GlobalValue *GV = G->getGlobal();
4409
4410 // If the callee is preemptable, then the static linker will use a plt-stub
4411 // which saves the toc to the stack, and needs a nop after the call
4412 // instruction to convert to a toc-restore.
4413 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4414 return false;
4415
4416 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4417 // We may need a TOC restore in the situation where the caller requires a
4418 // valid TOC but the callee is PC Relative and does not.
4419 const Function *F = dyn_cast<Function>(GV);
4420 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4421
4422 // If we have an Alias we can try to get the function from there.
4423 if (Alias) {
4424 const GlobalObject *GlobalObj = Alias->getBaseObject();
4426 }
4427
4428 // If we still have no valid function pointer we do not have enough
4429 // information to determine if the callee uses PC Relative calls so we must
4430 // assume that it does.
4431 if (!F)
4432 return false;
4433
4434 // If the callee uses PC Relative we cannot guarantee that the callee won't
4435 // clobber the TOC of the caller and so we must assume that the two
4436 // functions do not share a TOC base.
4437 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4438 if (STICallee->isUsingPCRelativeCalls())
4439 return false;
4440
4441 // If the GV is not a strong definition then we need to assume it can be
4442 // replaced by another function at link time. The function that replaces
4443 // it may not share the same TOC as the caller since the callee may be
4444 // replaced by a PC Relative version of the same function.
4445 if (!GV->isStrongDefinitionForLinker())
4446 return false;
4447
4448 // The medium and large code models are expected to provide a sufficiently
4449 // large TOC to provide all data addressing needs of a module with a
4450 // single TOC.
4451 if (CodeModel::Medium == TM.getCodeModel() ||
4452 CodeModel::Large == TM.getCodeModel())
4453 return true;
4454
4455 // Any explicitly-specified sections and section prefixes must also match.
4456 // Also, if we're using -ffunction-sections, then each function is always in
4457 // a different section (the same is true for COMDAT functions).
4458 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4459 GV->getSection() != Caller->getSection())
4460 return false;
4461 if (const auto *F = dyn_cast<Function>(GV)) {
4462 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4463 return false;
4464 }
4465
4466 return true;
4467}
4468
4469static bool
4471 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4472 assert(Subtarget.is64BitELFABI());
4473
4474 const unsigned PtrByteSize = 8;
4475 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4476
4477 static const MCPhysReg GPR[] = {
4478 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4479 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4480 };
4481 static const MCPhysReg VR[] = {
4482 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4483 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4484 };
4485
4486 const unsigned NumGPRs = array_lengthof(GPR);
4487 const unsigned NumFPRs = 13;
4488 const unsigned NumVRs = array_lengthof(VR);
4489 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4490
4491 unsigned NumBytes = LinkageSize;
4492 unsigned AvailableFPRs = NumFPRs;
4493 unsigned AvailableVRs = NumVRs;
4494
4495 for (const ISD::OutputArg& Param : Outs) {
4496 if (Param.Flags.isNest()) continue;
4497
4498 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4499 LinkageSize, ParamAreaSize, NumBytes,
4501 return true;
4502 }
4503 return false;
4504}
4505
4506static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4507 if (CB.arg_size() != CallerFn->arg_size())
4508 return false;
4509
4510 auto CalleeArgIter = CB.arg_begin();
4511 auto CalleeArgEnd = CB.arg_end();
4513
4515 const Value* CalleeArg = *CalleeArgIter;
4516 const Value* CallerArg = &(*CallerArgIter);
4517 if (CalleeArg == CallerArg)
4518 continue;
4519
4520 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4521 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4522 // }
4523 // 1st argument of callee is undef and has the same type as caller.
4524 if (CalleeArg->getType() == CallerArg->getType() &&
4526 continue;
4527
4528 return false;
4529 }
4530
4531 return true;
4532}
4533
4534// Returns true if TCO is possible between the callers and callees
4535// calling conventions.
4536static bool
4539 // Tail calls are possible with fastcc and ccc.
4540 auto isTailCallableCC = [] (CallingConv::ID CC){
4541 return CC == CallingConv::C || CC == CallingConv::Fast;
4542 };
4544 return false;
4545
4546 // We can safely tail call both fastcc and ccc callees from a c calling
4547 // convention caller. If the caller is fastcc, we may have less stack space
4548 // than a non-fastcc caller with the same signature so disable tail-calls in
4549 // that case.
4550 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4551}
4552
4553bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4554 SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4556 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4558
4559 if (DisableSCO && !TailCallOpt) return false;
4560
4561 // Variadic argument functions are not supported.
4562 if (isVarArg) return false;
4563
4564 auto &Caller = DAG.getMachineFunction().getFunction();
4565 // Check that the calling conventions are compatible for tco.
4566 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4567 return false;
4568
4569 // Caller contains any byval parameter is not supported.
4570 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4571 return false;
4572
4573 // Callee contains any byval parameter is not supported, too.
4574 // Note: This is a quick work around, because in some cases, e.g.
4575 // caller's stack size > callee's stack size, we are still able to apply
4576 // sibling call optimization. For example, gcc is able to do SCO for caller1
4577 // in the following example, but not for caller2.
4578 // struct test {
4579 // long int a;
4580 // char ary[56];
4581 // } gTest;
4582 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4583 // b->a = v.a;
4584 // return 0;
4585 // }
4586 // void caller1(struct test a, struct test c, struct test *b) {
4587 // callee(gTest, b); }
4588 // void caller2(struct test *b) { callee(gTest, b); }
4589 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4590 return false;
4591
4592 // If callee and caller use different calling conventions, we cannot pass
4593 // parameters on stack since offsets for the parameter area may be different.
4594 if (Caller.getCallingConv() != CalleeCC &&
4595 needStackSlotPassParameters(Subtarget, Outs))
4596 return false;
4597
4598 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4599 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4600 // callee potentially have different TOC bases then we cannot tail call since
4601 // we need to restore the TOC pointer after the call.
4602 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4603 // We cannot guarantee this for indirect calls or calls to external functions.
4604 // When PC-Relative addressing is used, the concept of the TOC is no longer
4605 // applicable so this check is not required.
4606 // Check first for indirect calls.
4607 if (!Subtarget.isUsingPCRelativeCalls() &&
4609 return false;
4610
4611 // Check if we share the TOC base.
4612 if (!Subtarget.isUsingPCRelativeCalls() &&
4614 return false;
4615
4616 // TCO allows altering callee ABI, so we don't have to check further.
4618 return true;
4619
4620 if (DisableSCO) return false;
4621
4622 // If callee use the same argument list that caller is using, then we can
4623 // apply SCO on this case. If it is not, then we need to check if callee needs
4624 // stack for passing arguments.
4625 // PC Relative tail calls may not have a CallBase.
4626 // If there is no CallBase we cannot verify if we have the same argument
4627 // list so assume that we don't have the same argument list.
4628 if (CB && !hasSameArgumentList(&Caller, *CB) &&
4629 needStackSlotPassParameters(Subtarget, Outs))
4630 return false;
4631 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4632 return false;
4633
4634 return true;
4635}
4636
4637/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4638/// for tail call optimization. Targets which want to do tail call
4639/// optimization should implement this function.
4640bool
4641PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4643 bool isVarArg,
4645 SelectionDAG& DAG) const {
4646 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4647 return false;
4648
4649 // Variable argument functions are not supported.
4650 if (isVarArg)
4651 return false;
4652
4656 // Functions containing by val parameters are not supported.
4657 for (unsigned i = 0; i != Ins.size(); i++) {
4658 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4659 if (Flags.isByVal()) return false;
4660 }
4661
4662 // Non-PIC/GOT tail calls are supported.
4663 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4664 return true;
4665
4666 // At the moment we can only do local tail calls (in same module, hidden
4667 // or protected) if we are generating PIC.
4669 return G->getGlobal()->hasHiddenVisibility()
4670 || G->getGlobal()->hasProtectedVisibility();
4671 }
4672
4673 return false;
4674}
4675
4676/// isCallCompatibleAddress - Return the immediate to use if the specified
4677/// 32-bit value is representable in the immediate field of a BxA instruction.
4680 if (!C) return nullptr;
4681
4682 int Addr = C->getZExtValue();
4683 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4685 return nullptr; // Top 6 bits have to be sext of immediate.
4686
4687 return DAG
4688 .getConstant(
4689 (int)C->getZExtValue() >> 2, SDLoc(Op),
4691 .getNode();
4692}
4693
4694namespace {
4695
4696struct TailCallArgumentInfo {
4697 SDValue Arg;
4698 SDValue FrameIdxOp;
4699 int FrameIdx = 0;
4700
4701 TailCallArgumentInfo() = default;
4702};
4703
4704} // end anonymous namespace
4705
4706/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4708 SelectionDAG &DAG, SDValue Chain,
4711 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4712 SDValue Arg = TailCallArgs[i].Arg;
4713 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4714 int FI = TailCallArgs[i].FrameIdx;
4715 // Store relative to framepointer.
4716 MemOpChains.push_back(DAG.getStore(
4717 Chain, dl, Arg, FIN,
4719 }
4720}
4721
4722/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4723/// the appropriate stack slot for the tail call optimized function call.
4726 int SPDiff, const SDLoc &dl) {
4727 if (SPDiff) {
4728 // Calculate the new stack slot for the return address.
4730 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4731 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4732 bool isPPC64 = Subtarget.isPPC64();
4733 int SlotSize = isPPC64 ? 8 : 4;
4734 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4735 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4736 NewRetAddrLoc, true);
4737 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4739 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4741 }
4742 return Chain;
4743}
4744
4745/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4746/// the position of the argument.
4747static void
4749 SDValue Arg, int SPDiff, unsigned ArgOffset,
4751 int Offset = ArgOffset + SPDiff;
4752 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4753 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4754 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4755 SDValue FIN = DAG.getFrameIndex(FI, VT);
4756 TailCallArgumentInfo Info;
4757 Info.Arg = Arg;
4758 Info.FrameIdxOp = FIN;
4759 Info.FrameIdx = FI;
4760 TailCallArguments.push_back(Info);
4761}
4762
4763/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4764/// stack slot. Returns the chain as result and the loaded frame pointers in
4765/// LROpOut/FPOpout. Used when tail calling.
4766SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4767 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4768 SDValue &FPOpOut, const SDLoc &dl) const {
4769 if (SPDiff) {
4770 // Load the LR and FP stack slot for later adjusting.
4771 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4772 LROpOut = getReturnAddrFrameIndex(DAG);
4773 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4774 Chain = SDValue(LROpOut.getNode(), 1);
4775 }
4776 return Chain;
4777}
4778
4779/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4780/// by "Src" to address "Dst" of size "Size". Alignment information is
4781/// specified by the specific parameter attribute. The copy will be passed as
4782/// a byval function parameter.
4783/// Sometimes what we are copying is the end of a larger object, the part that
4784/// does not fit in registers.
4786 SDValue Chain, ISD::ArgFlagsTy Flags,
4787 SelectionDAG &DAG, const SDLoc &dl) {
4788 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4789 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
4790 Flags.getNonZeroByValAlign(), false, false, false,
4792}
4793
4794/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4795/// tail calls.
4798 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4799 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4802 if (!isTailCall) {
4803 if (isVector) {
4804 SDValue StackPtr;
4805 if (isPPC64)
4806 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4807 else
4808 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4809 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4810 DAG.getConstant(ArgOffset, dl, PtrVT));
4811 }
4812 MemOpChains.push_back(
4813 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4814 // Calculate and remember argument location.
4815 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4817}
4818
4819static void
4821 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4822 SDValue FPOp,
4824 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4825 // might overwrite each other in case of tail call optimization.
4827 // Do not flag preceding copytoreg stuff together with the following stuff.
4828 InFlag = SDValue();
4830 MemOpChains2, dl);
4831 if (!MemOpChains2.empty())
4833
4834 // Store the return address to the appropriate stack slot.
4835 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4836
4837 // Emit callseq_end just before tailcall node.
4838 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4839 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4840 InFlag = Chain.getValue(1);
4841}
4842
4843// Is this global address that of a function that can be called by name? (as
4844// opposed to something that must hold a descriptor for an indirect call).
4847 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4848 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4849 return false;
4850
4851 return G->getGlobal()->getValueType()->isFunctionTy();
4852 }
4853
4854 return false;
4855}
4856
4857SDValue PPCTargetLowering::LowerCallResult(
4858 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4859 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4860 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4862 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4863 *DAG.getContext());
4864
4865 CCRetInfo.AnalyzeCallResult(
4866 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
4868 : RetCC_PPC);
4869
4870 // Copy all of the result registers out of their specified physreg.
4871 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4872 CCValAssign &VA = RVLocs[i];
4873 assert(VA.isRegLoc() && "Can only return in registers!");
4874
4875 SDValue Val;
4876
4877 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
4878 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
4879 InFlag);
4880 Chain = Lo.getValue(1);
4881 InFlag = Lo.getValue(2);
4882 VA = RVLocs[++i]; // skip ahead to next loc
4883 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
4884 InFlag);
4885 Chain = Hi.getValue(1);
4886 InFlag = Hi.getValue(2);
4887 if (!Subtarget.isLittleEndian())
4888 std::swap (Lo, Hi);
4889 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
4890 } else {
4891 Val = DAG.getCopyFromReg(Chain, dl,
4892 VA.getLocReg(), VA.getLocVT(), InFlag);
4893 Chain = Val.getValue(1);
4894 InFlag = Val.getValue(2);
4895 }
4896
4897 switch (VA.getLocInfo()) {
4898 default: llvm_unreachable("Unknown loc info!");
4899 case CCValAssign::Full: break;
4900 case CCValAssign::AExt:
4901 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4902 break;
4903 case CCValAssign::ZExt:
4904 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4905 DAG.getValueType(VA.getValVT()));
4906 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4907 break;
4908 case CCValAssign::SExt:
4909 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4910 DAG.getValueType(VA.getValVT()));
4911 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4912 break;
4913 }
4914
4915 InVals.push_back(Val);
4916 }
4917
4918 return Chain;
4919}
4920
4922 const PPCSubtarget &Subtarget, bool isPatchPoint) {
4923 // PatchPoint calls are not indirect.
4924 if (isPatchPoint)
4925 return false;
4926
4928 return false;
4929
4930 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
4931 // becuase the immediate function pointer points to a descriptor instead of
4932 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
4933 // pointer immediate points to the global entry point, while the BLA would
4934 // need to jump to the local entry point (see rL211174).
4935 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
4937 return false;
4938
4939 return true;
4940}
4941
4942// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
4943static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
4944 return Subtarget.isAIXABI() ||
4945 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
4946}
4947
4949 const Function &Caller,
4950 const SDValue &Callee,
4951 const PPCSubtarget &Subtarget,
4952 const TargetMachine &TM) {
4953 if (CFlags.IsTailCall)
4954 return PPCISD::TC_RETURN;
4955
4956 // This is a call through a function pointer.
4957 if (CFlags.IsIndirect) {
4958 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
4959 // indirect calls. The save of the caller's TOC pointer to the stack will be
4960 // inserted into the DAG as part of call lowering. The restore of the TOC
4961 // pointer is modeled by using a pseudo instruction for the call opcode that
4962 // represents the 2 instruction sequence of an indirect branch and link,
4963 // immediately followed by a load of the TOC pointer from the the stack save
4964 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
4965 // as it is not saved or used.
4967 : PPCISD::BCTRL;
4968 }
4969
4970 if (Subtarget.isUsingPCRelativeCalls()) {
4971 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
4972 return PPCISD::CALL_NOTOC;
4973 }
4974
4975 // The ABIs that maintain a TOC pointer accross calls need to have a nop
4976 // immediately following the call instruction if the caller and callee may
4977 // have different TOC bases. At link time if the linker determines the calls
4978 // may not share a TOC base, the call is redirected to a trampoline inserted
4979 // by the linker. The trampoline will (among other things) save the callers
4980 // TOC pointer at an ABI designated offset in the linkage area and the linker
4981 // will rewrite the nop to be a load of the TOC pointer from the linkage area
4982 // into gpr2.
4983 if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
4984 return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
4986
4987 return PPCISD::CALL;
4988}
4989
4991 const SDLoc &dl, const PPCSubtarget &Subtarget) {
4992 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
4993 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
4994 return SDValue(Dest, 0);
4995
4996 // Returns true if the callee is local, and false otherwise.
4997 auto isLocalCallee = [&]() {
5000 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5001
5002 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5004 };
5005
5006 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5007 // a static relocation model causes some versions of GNU LD (2.17.50, at
5008 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5009 // built with secure-PLT.
5010 bool UsePlt =
5011 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5013
5014 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5015 const TargetMachine &TM = Subtarget.getTargetMachine();
5016 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5017 MCSymbolXCOFF *S =
5019
5021 return DAG.getMCSymbol(S, PtrVT);
5022 };
5023
5025 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5026
5027 if (Subtarget.isAIXABI()) {
5028 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5030 }
5031 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5032 UsePlt ? PPCII::MO_PLT : 0);
5033 }
5034
5036 const char *SymName = S->getSymbol();
5037 if (Subtarget.isAIXABI()) {
5038 // If there exists a user-declared function whose name is the same as the
5039 // ExternalSymbol's, then we pick up the user-declared version.
5041 if (const Function *F =
5042 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5044
5045 // On AIX, direct function calls reference the symbol for the function's
5046 // entry point, which is named by prepending a "." before the function's
5047 // C-linkage name. A Qualname is returned here because an external
5048 // function entry point is a csect with XTY_ER property.
5049 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5050 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5051 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5052 (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5054 return Sec->getQualNameSymbol();
5055 };
5056
5057 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5058 }
5059 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5060 UsePlt ? PPCII::MO_PLT : 0);
5061 }
5062
5063 // No transformation needed.
5064 assert(Callee.getNode() && "What no callee?");
5065 return Callee;
5066}
5067
5069 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5070 "Expected a CALLSEQ_STARTSDNode.");
5071
5072 // The last operand is the chain, except when the node has glue. If the node
5073 // has glue, then the last operand is the glue, and the chain is the second
5074 // last operand.
5075 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5076 if (LastValue.getValueType() != MVT::Glue)
5077 return LastValue;
5078
5079 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5080}
5081
5082// Creates the node that moves a functions address into the count register
5083// to prepare for an indirect call instruction.
5085 SDValue &Glue, SDValue &Chain,
5086 const SDLoc &dl) {
5087 SDValue MTCTROps[] = {Chain, Callee, Glue};
5088 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5089 Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5090 makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5091 // The glue is the second value produced.
5092 Glue = Chain.getValue(1);
5093}
5094
5096 SDValue &Glue, SDValue &Chain,
5098 const CallBase *CB, const SDLoc &dl,
5099 bool hasNest,
5100 const PPCSubtarget &Subtarget) {
5101 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5102 // entry point, but to the function descriptor (the function entry point
5103 // address is part of the function descriptor though).
5104 // The function descriptor is a three doubleword structure with the
5105 // following fields: function entry point, TOC base address and
5106 // environment pointer.
5107 // Thus for a call through a function pointer, the following actions need
5108 // to be performed:
5109 // 1. Save the TOC of the caller in the TOC save area of its stack
5110 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5111 // 2. Load the address of the function entry point from the function
5112 // descriptor.
5113 // 3. Load the TOC of the callee from the function descriptor into r2.
5114 // 4. Load the environment pointer from the function descriptor into
5115 // r11.
5116 // 5. Branch to the function entry point address.
5117 // 6. On return of the callee, the TOC of the caller needs to be
5118 // restored (this is done in FinishCall()).
5119 //
5120 // The loads are scheduled at the beginning of the call sequence, and the
5121 // register copies are flagged together to ensure that no other
5122 // operations can be scheduled in between. E.g. without flagging the
5123 // copies together, a TOC access in the caller could be scheduled between
5124 // the assignment of the callee TOC and the branch to the callee, which leads
5125 // to incorrect code.
5126
5127 // Start by loading the function address from the descriptor.
5129 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5133
5134 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5135
5136 // Registers used in building the DAG.
5138 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5139
5140 // Offsets of descriptor members.
5141 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5142 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5143
5144 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5145 const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5146
5147 // One load for the functions entry point address.
5148 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5149 Alignment, MMOFlags);
5150
5151 // One for loading the TOC anchor for the module that contains the called
5152 // function.
5154 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5155 SDValue TOCPtr =
5156 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5157 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5158
5159 // One for loading the environment pointer.
5161 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5163 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5164 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5165
5166
5167 // Then copy the newly loaded TOC anchor to the TOC pointer.
5168 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5169 Chain = TOCVal.getValue(0);
5170 Glue = TOCVal.getValue(1);
5171
5172 // If the function call has an explicit 'nest' parameter, it takes the
5173 // place of the environment pointer.
5174 assert((!hasNest || !Subtarget.isAIXABI()) &&
5175 "Nest parameter is not supported on AIX.");
5176 if (!hasNest) {
5177 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5178 Chain = EnvVal.getValue(0);
5179 Glue = EnvVal.getValue(1);
5180 }
5181
5182 // The rest of the indirect call sequence is the same as the non-descriptor
5183 // DAG.
5184 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5185}
5186
5187static void
5190 SelectionDAG &DAG,
5191 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5192 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5193 const PPCSubtarget &Subtarget) {
5194 const bool IsPPC64 = Subtarget.isPPC64();
5195 // MVT for a general purpose register.
5196 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5197
5198 // First operand is always the chain.
5199 Ops.push_back(Chain);
5200
5201 // If it's a direct call pass the callee as the second operand.
5202 if (!CFlags.IsIndirect)
5203 Ops.push_back(Callee);
5204 else {
5205 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5206
5207 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5208 // on the stack (this would have been done in `LowerCall_64SVR4` or
5209 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5210 // represents both the indirect branch and a load that restores the TOC
5211 // pointer from the linkage area. The operand for the TOC restore is an add
5212 // of the TOC save offset to the stack pointer. This must be the second
5213 // operand: after the chain input but before any other variadic arguments.
5214 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5215 // saved or used.
5216 if (isTOCSaveRestoreRequired(Subtarget)) {
5218
5219 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5220 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5221 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5222 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5223 Ops.push_back(AddTOC);
5224 }
5225
5226 // Add the register used for the environment pointer.
5227 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5228 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5229 RegVT));
5230
5231
5232 // Add CTR register as callee so a bctr can be emitted later.
5233 if (CFlags.IsTailCall)
5234 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5235 }
5236
5237 // If this is a tail call add stack pointer delta.
5238 if (CFlags.IsTailCall)
5239 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5240
5241 // Add argument registers to the end of the list so that they are known live
5242 // into the call.
5243 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5244 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5245 RegsToPass[i].second.getValueType()));
5246
5247 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5248 // no way to mark dependencies as implicit here.
5249 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5250 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5251 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5252 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5253
5254 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5255 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5256 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5257
5258 // Add a register mask operand representing the call-preserved registers.
5259 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5260 const uint32_t *Mask =
5261 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5262 assert(Mask && "Missing call preserved mask for calling convention");
5263 Ops.push_back(DAG.getRegisterMask(Mask));
5264
5265 // If the glue is valid, it is the last operand.
5266 if (Glue.getNode())
5267 Ops.push_back(Glue);
5268}
5269
5270SDValue PPCTargetLowering::FinishCall(
5271 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5272 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5274 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5275 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5276
5277 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5278 Subtarget.isAIXABI())
5279 setUsesTOCBasePtr(DAG);
5280
5281 unsigned CallOpc =
5283 Subtarget, DAG.getTarget());
5284
5285 if (!CFlags.IsIndirect)
5286 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5287 else if (Subtarget.usesFunctionDescriptors())
5288 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5289 dl, CFlags.HasNest, Subtarget);
5290 else
5291 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5292
5293 // Build the operand list for the call instruction.
5295 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5296 SPDiff, Subtarget);
5297
5298 // Emit tail call.
5299 if (CFlags.IsTailCall) {
5300 // Indirect tail call when using PC Relative calls do not have the same
5301 // constraints.
5302 assert(((Callee.getOpcode() == ISD::Register &&
5303 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5304 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5305 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5307 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5308 "Expecting a global address, external symbol, absolute value, "
5309 "register or an indirect tail call when PC Relative calls are "
5310 "used.");
5311 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5313 "Unexpected call opcode for a tail call.");
5315 return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5316 }
5317
5318 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5319 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5320 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5321 Glue = Chain.getValue(1);
5322
5323 // When performing tail call optimization the callee pops its arguments off
5324 // the stack. Account for this here so these bytes can be pushed back on in
5325 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5326 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5328 ? NumBytes
5329 : 0;
5330
5331 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5332 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5333 Glue, dl);
5334 Glue = Chain.getValue(1);
5335
5336 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5337 DAG, InVals);
5338}
5339
5340SDValue
5341PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5342 SmallVectorImpl<SDValue> &InVals) const {
5343 SelectionDAG &DAG = CLI.DAG;
5344 SDLoc &dl = CLI.DL;
5346 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5348 SDValue Chain = CLI.Chain;
5349 SDValue Callee = CLI.Callee;
5350 bool &isTailCall = CLI.IsTailCall;
5351 CallingConv::ID CallConv = CLI.CallConv;
5352 bool isVarArg = CLI.IsVarArg;
5353 bool isPatchPoint = CLI.IsPatchPoint;
5354 const CallBase *CB = CLI.CB;
5355
5356 if (isTailCall) {
5357 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5358 isTailCall = false;
5359 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5360 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5361 Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5362 else
5363 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5364 Ins, DAG);
5365 if (isTailCall) {
5366 ++NumTailCalls;
5367 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5369
5370 // PC Relative calls no longer guarantee that the callee is a Global
5371 // Address Node. The callee could be an indirect tail call in which
5372 // case the SDValue for the callee could be a load (to load the address
5373 // of a function pointer) or it may be a register copy (to move the
5374 // address of the callee from a function parameter into a virtual
5375 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5376 assert((Subtarget.isUsingPCRelativeCalls() ||
5378 "Callee should be an llvm::Function object.");
5379
5380 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5381 << "\nTCO callee: ");
5382 LLVM_DEBUG(Callee.dump());
5383 }
5384 }
5385
5386 if (!isTailCall && CB && CB->isMustTailCall())
5387 report_fatal_error("failed to perform tail call elimination on a call "
5388 "site marked musttail");
5389
5390 // When long calls (i.e. indirect calls) are always used, calls are always
5391 // made via function pointer. If we have a function name, first translate it
5392 // into a pointer.
5393 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5394 !isTailCall)
5395 Callee = LowerGlobalAddress(Callee, DAG);
5396
5397 CallFlags CFlags(
5398 CallConv, isTailCall, isVarArg, isPatchPoint,
5399 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5400 // hasNest
5401 Subtarget.is64BitELFABI() &&
5402 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5403 CLI.NoMerge);
5404
5405 if (Subtarget.isAIXABI())
5406 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5407 InVals, CB);
5408
5409 assert(Subtarget.isSVR4ABI());
5410 if (Subtarget.isPPC64())
5411 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5412 InVals, CB);
5413 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5414 InVals, CB);
5415}
5416
5417SDValue PPCTargetLowering::LowerCall_32SVR4(
5418 SDValue Chain, SDValue Callee, CallFlags CFlags,
5420 const SmallVectorImpl<SDValue> &OutVals,
5421 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5423 const CallBase *CB) const {
5424 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5425 // of the 32-bit SVR4 ABI stack frame layout.
5426
5427 const CallingConv::ID CallConv = CFlags.CallConv;
5428 const bool IsVarArg = CFlags.IsVarArg;
5429 const bool IsTailCall = CFlags.IsTailCall;
5430
5431 assert((CallConv == CallingConv::C ||
5432 CallConv == CallingConv::Cold ||
5433 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5434
5435 const Align PtrAlign(4);
5436
5438
5439 // Mark this function as potentially containing a function that contains a
5440 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5441 // and restoring the callers stack pointer in this functions epilog. This is
5442 // done because by tail calling the called function might overwrite the value
5443 // in this function's (MF) stack pointer stack slot 0(SP).
5444 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5445 CallConv == CallingConv::Fast)
5446 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5447
5448 // Count how many bytes are to be pushed on the stack, including the linkage
5449 // area, parameter list area and the part of the local variable space which
5450 // contains copies of aggregates which are passed by value.
5451
5452 // Assign locations to all of the outgoing arguments.
5454 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5455
5456 // Reserve space for the linkage area on the stack.
5457 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5458 PtrAlign);
5459 if (useSoftFloat())
5460 CCInfo.PreAnalyzeCallOperands(Outs);
5461
5462 if (IsVarArg) {
5463 // Handle fixed and variable vector arguments differently.
5464 // Fixed vector arguments go into registers as long as registers are
5465 // available. Variable vector arguments always go into memory.
5466 unsigned NumArgs = Outs.size();
5467
5468 for (unsigned i = 0; i != NumArgs; ++i) {
5469 MVT ArgVT = Outs[i].VT;
5470 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5471 bool Result;
5472
5473 if (Outs[i].IsFixed) {
5475 CCInfo);
5476 } else {
5478 ArgFlags, CCInfo);
5479 }
5480
5481 if (Result) {
5482#ifndef NDEBUG
5483 errs() << "Call operand #" << i << " has unhandled type "
5484 << EVT(ArgVT).getEVTString() << "\n";
5485#endif
5486 llvm_unreachable(nullptr);
5487 }
5488 }
5489 } else {
5490 // All arguments are treated the same.
5491 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5492 }
5493 CCInfo.clearWasPPCF128();
5494
5495 // Assign locations to all of the outgoing aggregate by value arguments.
5497 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5498
5499 // Reserve stack space for the allocations in CCInfo.
5500 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5501
5502 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5503
5504 // Size of the linkage area, parameter list area and the part of the local
5505 // space variable where copies of aggregates which are passed by value are
5506 // stored.
5507 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5508
5509 // Calculate by how many bytes the stack has to be adjusted in case of tail
5510 // call optimization.
5511 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5512
5513 // Adjust the stack pointer for the new arguments...
5514 // These operations are automatically eliminated by the prolog/epilog pass
5515 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5516 SDValue CallSeqStart = Chain;
5517
5518 // Load the return address and frame pointer so it can be moved somewhere else
5519 // later.
5520 SDValue LROp, FPOp;
5521 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5522
5523 // Set up a copy of the stack pointer for use loading and storing any
5524 // arguments that may not fit in the registers available for argument
5525 // passing.
5526 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5527
5531
5532 bool seenFloatArg = false;
5533 // Walk the register/memloc assignments, inserting copies/loads.
5534 // i - Tracks the index into the list of registers allocated for the call
5535 // RealArgIdx - Tracks the index into the list of actual function arguments
5536 // j - Tracks the index into the list of byval arguments
5537 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5538 i != e;
5539 ++i, ++RealArgIdx) {
5540 CCValAssign &VA = ArgLocs[i];
5541 SDValue Arg = OutVals[RealArgIdx];
5542 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5543
5544 if (Flags.isByVal()) {
5545 // Argument is an aggregate which is passed by value, thus we need to
5546 // create a copy of it in the local variable space of the current stack
5547 // frame (which is the stack frame of the caller) and pass the address of
5548 // this copy to the callee.
5549 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5551 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5552
5553 // Memory reserved in the local variable space of the callers stack frame.
5554 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5555
5558 StackPtr, PtrOff);
5559
5560 // Create a copy of the argument in the local area of the current
5561 // stack frame.
5564 CallSeqStart.getNode()->getOperand(0),
5565 Flags, DAG, dl);
5566
5567 // This must go outside the CALLSEQ_START..END.
5569 SDLoc(MemcpyCall));
5570 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5571 NewCallSeqStart.getNode());
5572 Chain = CallSeqStart = NewCallSeqStart;
5573
5574 // Pass the address of the aggregate copy on the stack either in a
5575 // physical register or in the parameter list area of the current stack
5576 // frame to the callee.
5577 Arg = PtrOff;
5578 }
5579
5580 // When useCRBits() is true, there can be i1 arguments.
5581 // It is because getRegisterType(MVT::i1) => MVT::i1,
5582 // and for other integer types getRegisterType() => MVT::i32.
5583 // Extend i1 and ensure callee will get i32.
5584 if (Arg.getValueType() == MVT::i1)
5585 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5586 dl, MVT::i32, Arg);
5587
5588 if (VA.isRegLoc()) {
5589 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5590 // Put argument in a physical register.
5591 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5592 bool IsLE = Subtarget.isLittleEndian();
5594 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5595 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5597 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5598 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5599 SVal.getValue(0)));
5600 } else
5601 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5602 } else {
5603 // Put argument in the parameter list area of the current stack frame.
5604 assert(VA.isMemLoc());
5605 unsigned LocMemOffset = VA.getLocMemOffset();
5606
5607 if (!IsTailCall) {
5610 StackPtr, PtrOff);
5611
5612 MemOpChains.push_back(
5613 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5614 } else {
5615 // Calculate and remember argument location.
5618 }
5619 }
5620 }
5621
5622 if (!MemOpChains.empty())
5624
5625 // Build a sequence of copy-to-reg nodes chained together with token chain
5626 // and flag operands which copy the outgoing args into the appropriate regs.
5628 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5629 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5630 RegsToPass[i].second, InFlag);
5631 InFlag = Chain.getValue(1);
5632 }
5633
5634 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5635 // registers.
5636 if (IsVarArg) {
5638 SDValue Ops[] = { Chain, InFlag };
5639
5640 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5641 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5642
5643 InFlag = Chain.getValue(1);
5644 }
5645
5646 if (IsTailCall)
5647 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5649
5650 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5651 Callee, SPDiff, NumBytes, Ins, InVals, CB);
5652}
5653
5654// Copy an argument into memory, being careful to do this outside the
5655// call sequence for the call to which the argument belongs.
5656SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5658 SelectionDAG &DAG, const SDLoc &dl) const {
5660 CallSeqStart.getNode()->getOperand(0),
5661 Flags, DAG, dl);
5662 // The MEMCPY must go outside the CALLSEQ_START..END.
5663 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5665 SDLoc(MemcpyCall));
5666 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5667 NewCallSeqStart.getNode());
5668 return NewCallSeqStart;
5669}
5670
5671SDValue PPCTargetLowering::LowerCall_64SVR4(
5672 SDValue Chain, SDValue Callee, CallFlags CFlags,
5674 const SmallVectorImpl<SDValue> &OutVals,
5675 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5677 const CallBase *CB) const {
5678 bool isELFv2ABI = Subtarget.isELFv2ABI();
5679 bool isLittleEndian = Subtarget.isLittleEndian();
5680 unsigned NumOps = Outs.size();
5681 bool IsSibCall = false;
5682 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5683
5685 unsigned PtrByteSize = 8;
5686
5688
5689 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5690 IsSibCall = true;
5691
5692 // Mark this function as potentially containing a function that contains a
5693 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5694 // and restoring the callers stack pointer in this functions epilog. This is
5695 // done because by tail calling the called function might overwrite the value
5696 // in this function's (MF) stack pointer stack slot 0(SP).
5697 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5698 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5699
5700 assert(!(IsFastCall && CFlags.IsVarArg) &&
5701 "fastcc not supported on varargs functions");
5702
5703 // Count how many bytes are to be pushed on the stack, including the linkage
5704 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5705 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5706 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5707 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5708 unsigned NumBytes = LinkageSize;
5709 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5710
5711 static const MCPhysReg GPR[] = {
5712 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5713 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5714 };
5715 static const MCPhysReg VR[] = {
5716 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5717 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5718 };
5719
5720 const unsigned NumGPRs = array_lengthof(GPR);
5721 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5722 const unsigned NumVRs = array_lengthof(VR);
5723
5724 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5725 // can be passed to the callee in registers.
5726 // For the fast calling convention, there is another check below.
5727 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5728 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5729 if (!HasParameterArea) {
5730 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5731 unsigned AvailableFPRs = NumFPRs;
5732 unsigned AvailableVRs = NumVRs;
5733 unsigned NumBytesTmp = NumBytes;
5734 for (unsigned i = 0; i != NumOps; ++i) {
5735 if (Outs[i].Flags.isNest()) continue;
5736 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5737 PtrByteSize, LinkageSize, ParamAreaSize,
5739 HasParameterArea = true;
5740 }
5741 }
5742
5743 // When using the fast calling convention, we don't provide backing for
5744 // arguments that will be in registers.
5745 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5746
5747 // Avoid allocating parameter area for fastcc functions if all the arguments
5748 // can be passed in the registers.
5749 if (IsFastCall)
5750 HasParameterArea = false;
5751
5752 // Add up all the space actually used.
5753 for (unsigned i = 0; i != NumOps; ++i) {
5754 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5755 EVT ArgVT = Outs[i].VT;
5756 EVT OrigVT = Outs[i].ArgVT;
5757
5758 if (Flags.isNest())
5759 continue;
5760
5761 if (IsFastCall) {
5762 if (Flags.isByVal()) {
5763 NumGPRsUsed += (Flags.getByValSize()+7)/8;
5764 if (NumGPRsUsed > NumGPRs)
5765 HasParameterArea = true;
5766 } else {
5767 switch (ArgVT.getSimpleVT().SimpleTy) {
5768 default: llvm_unreachable("Unexpected ValueType for argument!");
5769 case MVT::i1:
5770 case MVT::i32:
5771 case MVT::i64:
5772 if (++NumGPRsUsed <= NumGPRs)
5773 continue;
5774 break;
5775 case MVT::v4i32:
5776 case MVT::v8i16:
5777 case MVT::v16i8:
5778 case MVT::v2f64:
5779 case MVT::v2i64:
5780 case MVT::v1i128:
5781 case MVT::f128:
5782 if (++NumVRsUsed <= NumVRs)
5783 continue;
5784 break;
5785 case MVT::v4f32:
5786 if (++NumVRsUsed <= NumVRs)
5787 continue;
5788 break;
5789 case MVT::f32:
5790 case MVT::f64:
5791 if (++NumFPRsUsed <= NumFPRs)
5792 continue;
5793 break;
5794 }
5795 HasParameterArea = true;
5796 }
5797 }
5798
5799 /* Respect alignment of argument on the stack. */
5800 auto Alignement =
5802 NumBytes = alignTo(NumBytes, Alignement);
5803
5804 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5805 if (Flags.isInConsecutiveRegsLast())
5806 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5807 }
5808
5809 unsigned NumBytesActuallyUsed = NumBytes;
5810
5811 // In the old ELFv1 ABI,
5812 // the prolog code of the callee may store up to 8 GPR argument registers to
5813 // the stack, allowing va_start to index over them in memory if its varargs.
5814 // Because we cannot tell if this is needed on the caller side, we have to
5815 // conservatively assume that it is needed. As such, make sure we have at
5816 // least enough stack space for the caller to store the 8 GPRs.
5817 // In the ELFv2 ABI, we allocate the parameter area iff a callee
5818 // really requires memory operands, e.g. a vararg function.
5819 if (HasParameterArea)
5820 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5821 else
5822 NumBytes = LinkageSize;
5823
5824 // Tail call needs the stack to be aligned.
5825 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5826 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5827
5828 int SPDiff = 0;
5829
5830 // Calculate by how many bytes the stack has to be adjusted in case of tail
5831 // call optimization.
5832 if (!IsSibCall)
5833 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
5834
5835 // To protect arguments on the stack from being clobbered in a tail call,
5836 // force all the loads to happen before doing any other lowering.
5837 if (CFlags.IsTailCall)
5838 Chain = DAG.getStackArgumentTokenFactor(Chain);
5839
5840 // Adjust the stack pointer for the new arguments...
5841 // These operations are automatically eliminated by the prolog/epilog pass
5842 if (!IsSibCall)
5843 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5844 SDValue CallSeqStart = Chain;
5845
5846 // Load the return address and frame pointer so it can be move somewhere else
5847 // later.
5848 SDValue LROp, FPOp;
5849 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5850
5851 // Set up a copy of the stack pointer for use loading and storing any
5852 // arguments that may not fit in the registers available for argument
5853 // passing.
5854 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5855
5856 // Figure out which arguments are going to go in registers, and which in
5857 // memory. Also, if this is a vararg function, floating point operations
5858 // must be stored to our stack, and loaded into integer regs as well, if
5859 // any integer regs are available for argument passing.
5860 unsigned ArgOffset = LinkageSize;
5861
5864
5866 for (unsigned i = 0; i != NumOps; ++i) {
5867 SDValue Arg = OutVals[i];
5868 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5869 EVT ArgVT = Outs[i].VT;
5870 EVT OrigVT = Outs[i].ArgVT;
5871
5872 // PtrOff will be used to store the current argument to the stack if a
5873 // register cannot be found for it.
5875
5876 // We re-align the argument offset for each argument, except when using the
5877 // fast calling convention, when we need to make sure we do that only when
5878 // we'll actually use a stack slot.
5879 auto ComputePtrOff = [&]() {
5880 /* Respect alignment of argument on the stack. */
5881 auto Alignment =
5883 ArgOffset = alignTo(ArgOffset, Alignment);
5884
5885 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5886
5887 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5888 };
5889
5890 if (!IsFastCall) {
5891 ComputePtrOff();
5892
5893 /* Compute GPR index associated with argument offset. */
5894 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5895 GPR_idx = std::min(GPR_idx, NumGPRs);
5896 }
5897
5898 // Promote integers to 64-bit values.
5899 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5900 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5901 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5902 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5903 }
5904
5905 // FIXME memcpy is used way more than necessary. Correctness first.
5906 // Note: "by value" is code for passing a structure by value, not
5907 // basic types.
5908 if (Flags.isByVal()) {
5909 // Note: Size includes alignment padding, so
5910 // struct x { short a; char b; }
5911 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5912 // These are the proper values we need for right-justifying the
5913 // aggregate in a parameter register.
5914 unsigned Size = Flags.getByValSize();
5915
5916 // An empty aggregate parameter takes up no storage and no
5917 // registers.
5918 if (Size == 0)
5919 continue;
5920
5921 if (IsFastCall)
5922 ComputePtrOff();
5923
5924 // All aggregates smaller than 8 bytes must be passed right-justified.
5925 if (Size==1 || Size==2 || Size==4) {
5926 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5927 if (GPR_idx != NumGPRs) {
5928 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5929 MachinePointerInfo(), VT);
5930 MemOpChains.push_back(Load.getValue(1));
5931 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5932
5934 continue;
5935 }
5936 }
5937
5938 if (GPR_idx == NumGPRs && Size < 8) {
5940 if (!isLittleEndian) {
5942 PtrOff.getValueType());
5943 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5944 }
5945 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5947 Flags, DAG, dl);
5949 continue;
5950 }
5951 // Copy entire object into memory. There are cases where gcc-generated
5952 // code assumes it is there, even if it could be put entirely into
5953 // registers. (This is not what the doc says.)
5954
5955 // FIXME: The above statement is likely due to a misunderstanding of the
5956 // documents. All arguments must be copied into the parameter area BY
5957 // THE CALLEE in the event that the callee takes the address of any
5958 // formal argument. That has not yet been implemented. However, it is
5959 // reasonable to use the stack area as a staging area for the register
5960 // load.
5961
5962 // Skip this for small aggregates, as we will use the same slot for a
5963 // right-justified copy, below.
5964 if (Size >= 8)
5965 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5967 Flags, DAG, dl);
5968
5969 // When a register is available, pass a small aggregate right-justified.
5970 if (Size < 8 && GPR_idx != NumGPRs) {
5971 // The easiest way to get this right-justified in a register
5972 // is to copy the structure into the rightmost portion of a
5973 // local variable slot, then load the whole slot into the
5974 // register.
5975 // FIXME: The memcpy seems to produce pretty awful code for
5976 // small aggregates, particularly for packed ones.
5977 // FIXME: It would be preferable to use the slot in the
5978 // parameter save area instead of a new local variable.
5980 if (!isLittleEndian) {
5981 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5982 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5983 }
5984 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5986 Flags, DAG, dl);
5987
5988 // Load the slot into the register.
5989 SDValue Load =
5990 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5991 MemOpChains.push_back(Load.getValue(1));
5992 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5993
5994 // Done with this argument.
5996 continue;
5997 }
5998
5999 // For aggregates larger than PtrByteSize, copy the pieces of the
6000 // object that fit into registers from the parameter save area.
6001 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6002 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6003 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6004 if (GPR_idx != NumGPRs) {
6005 SDValue Load =
6006 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6007 MemOpChains.push_back(Load.getValue(1));
6008 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6010 } else {
6012 break;
6013 }
6014 }
6015 continue;
6016 }
6017
6018 switch (Arg.getSimpleValueType().SimpleTy) {
6019 default: llvm_unreachable("Unexpected ValueType for argument!");
6020 case MVT::i1:
6021 case MVT::i32:
6022 case MVT::i64:
6023 if (Flags.isNest()) {
6024 // The 'nest' parameter, if any, is passed in R11.
6025 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6026 break;
6027 }
6028
6029 // These can be scalar arguments or elements of an integer array type
6030 // passed directly. Clang may use those instead of "byval" aggregate
6031 // types to avoid forcing arguments to memory unnecessarily.
6032 if (GPR_idx != NumGPRs) {
6033 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6034 } else {
6035 if (IsFastCall)
6036 ComputePtrOff();
6037
6039 "Parameter area must exist to pass an argument in memory.");
6040 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6041 true, CFlags.IsTailCall, false, MemOpChains,
6042 TailCallArguments, dl);
6043 if (IsFastCall)
6045 }
6046 if (!IsFastCall)
6048 break;
6049 case MVT::f32:
6050 case MVT::f64: {
6051 // These can be scalar arguments or elements of a float array type
6052 // passed directly. The latter are used to implement ELFv2 homogenous
6053 // float aggregates.
6054
6055 // Named arguments go into FPRs first, and once they overflow, the
6056 // remaining arguments go into GPRs and then the parameter save area.
6057 // Unnamed arguments for vararg functions always go to GPRs and
6058 // then the parameter save area. For now, put all arguments to vararg
6059 // routines always in both locations (FPR *and* GPR or stack slot).
6060 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6061 bool NeededLoad = false;
6062
6063 // First load the argument into the next available FPR.
6064 if (FPR_idx != NumFPRs)
6065 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6066
6067 // Next, load the argument into GPR or stack slot if needed.
6068 if (!NeedGPROrStack)
6069 ;
6070 else if (GPR_idx != NumGPRs && !IsFastCall) {
6071 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6072 // once we support fp <-> gpr moves.
6073
6074 // In the non-vararg case, this can only ever happen in the
6075 // presence of f32 array types, since otherwise we never run
6076 // out of FPRs before running out of GPRs.
6078
6079 // Double values are always passed in a single GPR.
6080 if (Arg.getValueType() != MVT::f32) {
6081 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6082
6083 // Non-array float values are extended and passed in a GPR.
6084 } else if (!Flags.isInConsecutiveRegs()) {
6085 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6087
6088 // If we have an array of floats, we collect every odd element
6089 // together with its predecessor into one GPR.
6090 } else if (ArgOffset % PtrByteSize != 0) {
6091 SDValue Lo, Hi;
6092 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6093 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6094 if (!isLittleEndian)
6095 std::swap(Lo, Hi);
6096 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6097
6098 // The final element, if even, goes into the first half of a GPR.
6099 } else if (Flags.isInConsecutiveRegsLast()) {
6100 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6102 if (!isLittleEndian)
6104 DAG.getConstant(32, dl, MVT::i32));
6105
6106 // Non-final even elements are skipped; they will be handled
6107 // together the with subsequent argument on the next go-around.
6108 } else
6109 ArgVal = SDValue();
6110
6111 if (ArgVal.getNode())
6112 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6113 } else {
6114 if (IsFastCall)
6115 ComputePtrOff();
6116
6117 // Single-precision floating-point values are mapped to the
6118 // second (rightmost) word of the stack doubleword.
6119 if (Arg.getValueType() == MVT::f32 &&
6120 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6121 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6123 }
6124
6126 "Parameter area must exist to pass an argument in memory.");
6127 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6128 true, CFlags.IsTailCall, false, MemOpChains,
6129 TailCallArguments, dl);
6130
6131 NeededLoad = true;
6132 }
6133 // When passing an array of floats, the array occupies consecutive
6134 // space in the argument area; only round up to the next doubleword
6135 // at the end of the array. Otherwise, each float takes 8 bytes.
6136 if (!IsFastCall || NeededLoad) {
6137 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6138 Flags.isInConsecutiveRegs()) ? 4 : 8;
6139 if (Flags.isInConsecutiveRegsLast())
6141 }
6142 break;
6143 }
6144 case MVT::v4f32:
6145 case MVT::v4i32:
6146 case MVT::v8i16:
6147 case MVT::v16i8:
6148 case MVT::v2f64:
6149 case MVT::v2i64:
6150 case MVT::v1i128:
6151 case MVT::f128:
6152 // These can be scalar arguments or elements of a vector array type
6153 // passed directly. The latter are used to implement ELFv2 homogenous
6154 // vector aggregates.
6155
6156 // For a varargs call, named arguments go into VRs or on the stack as
6157 // usual; unnamed arguments always go to the stack or the corresponding
6158 // GPRs when within range. For now, we always put the value in both
6159 // locations (or even all three).
6160 if (CFlags.IsVarArg) {
6162 "Parameter area must exist if we have a varargs call.");
6163 // We could elide this store in the case where the object fits
6164 // entirely in R registers. Maybe later.
6165 SDValue Store =
6166 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6167 MemOpChains.push_back(Store);
6168 if (VR_idx != NumVRs) {
6169 SDValue Load =
6170 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6171 MemOpChains.push_back(Load.getValue(1));
6172 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6173 }
6174 ArgOffset += 16;
6175 for (unsigned i=0; i<16; i+=PtrByteSize) {
6176 if (GPR_idx == NumGPRs)
6177 break;
6178 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6179 DAG.getConstant(i, dl, PtrVT));
6180 SDValue Load =
6181 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6182 MemOpChains.push_back(Load.getValue(1));
6183 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6184 }
6185 break;
6186 }
6187
6188 // Non-varargs Altivec params go into VRs or on the stack.
6189 if (VR_idx != NumVRs) {
6190 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6191 } else {
6192 if (IsFastCall)
6193 ComputePtrOff();
6194
6196 "Parameter area must exist to pass an argument in memory.");
6197 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6198 true, CFlags.IsTailCall, true, MemOpChains,
6199 TailCallArguments, dl);
6200 if (IsFastCall)
6201 ArgOffset += 16;
6202 }
6203
6204 if (!IsFastCall)
6205 ArgOffset += 16;
6206 break;
6207 }
6208 }
6209
6211 "mismatch in size of parameter area");
6213
6214 if (!MemOpChains.empty())
6216
6217 // Check if this is an indirect call (MTCTR/BCTRL).
6218 // See prepareDescriptorIndirectCall and buildCallOperands for more
6219 // information about calls through function pointers in the 64-bit SVR4 ABI.
6220 if (CFlags.IsIndirect) {
6221 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6222 // caller in the TOC save area.
6223 if (isTOCSaveRestoreRequired(Subtarget)) {
6224 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6225 // Load r2 into a virtual register and store it to the TOC save area.
6226 setUsesTOCBasePtr(DAG);
6227 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6228 // TOC save area offset.
6229 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6230 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6231 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6232 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6234 DAG.getMachineFunction(), TOCSaveOffset));
6235 }
6236 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6237 // This does not mean the MTCTR instruction must use R12; it's easier
6238 // to model this as an extra parameter, so do that.
6239 if (isELFv2ABI && !CFlags.IsPatchPoint)
6240 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6241 }
6242
6243 // Build a sequence of copy-to-reg nodes chained together with token chain
6244 // and flag operands which copy the outgoing args into the appropriate regs.
6246 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6247 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6248 RegsToPass[i].second, InFlag);
6249 InFlag = Chain.getValue(1);
6250 }
6251
6252 if (CFlags.IsTailCall && !IsSibCall)
6253 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6255
6256 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6257 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6258}
6259
6260static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6262 CCState &State) {
6263
6264 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6265 State.getMachineFunction().getSubtarget());
6266 const bool IsPPC64 = Subtarget.isPPC64();
6267 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6268 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6269
6270 if (ValVT.isVector() && !State.getMachineFunction()
6271 .getTarget()
6272 .Options.EnableAIXExtendedAltivecABI)
6273 report_fatal_error("the default Altivec AIX ABI is not yet supported");
6274
6275 if (ValVT == MVT::f128)
6276 report_fatal_error("f128 is unimplemented on AIX.");
6277
6278 if (ArgFlags.isNest())
6279 report_fatal_error("Nest arguments are unimplemented.");
6280
6281 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6282 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6283 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6284 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6285 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6286 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6287
6288 static const MCPhysReg VR[] = {// Vector registers.
6289 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6290 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6291 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6292
6293 if (ArgFlags.isByVal()) {
6294 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6295 report_fatal_error("Pass-by-value arguments with alignment greater than "
6296 "register width are not supported.");
6297
6298 const unsigned ByValSize = ArgFlags.getByValSize();
6299
6300 // An empty aggregate parameter takes up no storage and no registers,
6301 // but needs a MemLoc for a stack slot for the formal arguments side.
6302 if (ByValSize == 0) {
6304 State.getNextStackOffset(), RegVT,
6305 LocInfo));
6306 return false;
6307 }
6308
6309 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6310 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6311 for (const unsigned E = Offset + StackSize; Offset < E;
6312 Offset += PtrAlign.value()) {
6313 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6314 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6315 else {
6318 LocInfo));
6319 break;
6320 }
6321 }
6322 return false;
6323 }
6324
6325 // Arguments always reserve parameter save area.
6326 switch (ValVT.SimpleTy) {
6327 default:
6328 report_fatal_error("Unhandled value type for argument.");
6329 case MVT::i64:
6330 // i64 arguments should have been split to i32 for PPC32.
6331 assert(IsPPC64 && "PPC32 should have split i64 values.");
6333 case MVT::i1:
6334 case MVT::i32: {
6335 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6336 // AIX integer arguments are always passed in register width.
6337 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6338 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6340 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6341 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6342 else
6343 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6344
6345 return false;
6346 }
6347 case MVT::f32:
6348 case MVT::f64: {
6349 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6350 const unsigned StoreSize = LocVT.getStoreSize();
6351 // Floats are always 4-byte aligned in the PSA on AIX.
6352 // This includes f64 in 64-bit mode for ABI compatibility.
6353 const unsigned Offset =
6354 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6355 unsigned FReg = State.AllocateReg(FPR);
6356 if (FReg)
6357 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6358
6359 // Reserve and initialize GPRs or initialize the PSA as required.
6360 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6361 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6362 assert(FReg && "An FPR should be available when a GPR is reserved.");
6363 if (State.isVarArg()) {
6364 // Successfully reserved GPRs are only initialized for vararg calls.
6365 // Custom handling is required for:
6366 // f64 in PPC32 needs to be split into 2 GPRs.
6367 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6368 State.addLoc(
6369 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6370 }
6371 } else {
6372 // If there are insufficient GPRs, the PSA needs to be initialized.
6373 // Initialization occurs even if an FPR was initialized for
6374 // compatibility with the AIX XL compiler. The full memory for the
6375 // argument will be initialized even if a prior word is saved in GPR.
6376 // A custom memLoc is used when the argument also passes in FPR so
6377 // that the callee handling can skip over it easily.
6378 State.addLoc(
6379 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6380 LocInfo)
6381 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6382 break;
6383 }
6384 }
6385
6386 return false;
6387 }
6388 case MVT::v4f32:
6389 case MVT::v4i32:
6390 case MVT::v8i16:
6391 case MVT::v16i8:
6392 case MVT::v2i64:
6393 case MVT::v2f64:
6394 case MVT::v1i128: {
6395 if (State.isVarArg())
6397 "variadic arguments for vector types are unimplemented for AIX");
6398
6399 if (unsigned VReg = State.AllocateReg(VR))
6400 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6401 else {
6403 "passing vector parameters to the stack is unimplemented for AIX");
6404 }
6405 return false;
6406 }
6407 }
6408 return true;
6409}
6410
6412 bool IsPPC64) {
6413 assert((IsPPC64 || SVT != MVT::i64) &&
6414 "i64 should have been split for 32-bit codegen.");
6415
6416 switch (SVT) {
6417 default:
6418 report_fatal_error("Unexpected value type for formal argument");
6419 case MVT::i1:
6420 case MVT::i32:
6421 case MVT::i64:
6422 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6423 case MVT::f32:
6424 return &PPC::F4RCRegClass;
6425 case MVT::f64:
6426 return &PPC::F8RCRegClass;
6427 case MVT::v4f32:
6428 case MVT::v4i32:
6429 case MVT::v8i16:
6430 case MVT::v16i8:
6431 case MVT::v2i64:
6432 case MVT::v2f64:
6433 case MVT::v1i128:
6434 return &PPC::VRRCRegClass;
6435 }
6436}
6437
6439 SelectionDAG &DAG, SDValue ArgValue,
6440 MVT LocVT, const SDLoc &dl) {
6441 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6442 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6443
6444 if (Flags.isSExt())
6445 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6446 DAG.getValueType(ValVT));
6447 else if (Flags.isZExt())
6448 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6449 DAG.getValueType(ValVT));
6450
6451 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6452}
6453
6454static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6455 const unsigned LASize = FL->getLinkageSize();
6456
6457 if (PPC::GPRCRegClass.contains(Reg)) {
6458 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6459 "Reg must be a valid argument register!");
6460 return LASize + 4 * (Reg - PPC::R3);
6461 }
6462
6463 if (PPC::G8RCRegClass.contains(Reg)) {
6464 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6465 "Reg must be a valid argument register!");
6466 return LASize + 8 * (Reg - PPC::X3);
6467 }
6468
6469 llvm_unreachable("Only general purpose registers expected.");
6470}
6471
6472// AIX ABI Stack Frame Layout:
6473//
6474// Low Memory +--------------------------------------------+
6475// SP +---> | Back chain | ---+
6476// | +--------------------------------------------+ |
6477// | | Saved Condition Register | |
6478// | +--------------------------------------------+ |
6479// | | Saved Linkage Register | |
6480// | +--------------------------------------------+ | Linkage Area
6481// | | Reserved for compilers | |
6482// | +--------------------------------------------+ |
6483// | | Reserved for binders | |
6484// | +--------------------------------------------+ |
6485// | | Saved TOC pointer | ---+
6486// | +--------------------------------------------+
6487// | | Parameter save area |
6488// | +--------------------------------------------+
6489// | | Alloca space |
6490// | +--------------------------------------------+
6491// | | Local variable space |
6492// | +--------------------------------------------+
6493// | | Float/int conversion temporary |
6494// | +--------------------------------------------+
6495// | | Save area for AltiVec registers |
6496// | +--------------------------------------------+
6497// | | AltiVec alignment padding |
6498// | +--------------------------------------------+
6499// | | Save area for VRSAVE register |
6500// | +--------------------------------------------+
6501// | | Save area for General Purpose registers |
6502// | +--------------------------------------------+
6503// | | Save area for Floating Point registers |
6504// | +--------------------------------------------+
6505// +---- | Back chain |
6506// High Memory +--------------------------------------------+
6507//
6508// Specifications:
6509// AIX 7.2 Assembler Language Reference
6510// Subroutine linkage convention
6511
6512SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6513 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6514 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6515 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6516
6517 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
6518 CallConv == CallingConv::Fast) &&
6519 "Unexpected calling convention!");
6520
6521 if (getTargetMachine().Options.GuaranteedTailCallOpt)
6522 report_fatal_error("Tail call support is unimplemented on AIX.");
6523
6524 if (useSoftFloat())
6525 report_fatal_error("Soft float support is unimplemented on AIX.");
6526
6527 const PPCSubtarget &Subtarget =
6528 static_cast<const PPCSubtarget &>(DAG.getSubtarget());
6529
6530 const bool IsPPC64 = Subtarget.isPPC64();
6531 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6532
6533 // Assign locations to all of the incoming arguments.
6536 MachineFrameInfo &MFI = MF.getFrameInfo();
6537 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6538 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6539
6540 const EVT PtrVT = getPointerTy(MF.getDataLayout());
6541 // Reserve space for the linkage area on the stack.
6542 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6543 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6544 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6545
6547
6548 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6549 CCValAssign &VA = ArgLocs[I++];
6550 MVT LocVT = VA.getLocVT();
6551 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6552 if (VA.isMemLoc() && VA.getValVT().isVector())
6554 "passing vector parameters to the stack is unimplemented for AIX");
6555
6556 // For compatibility with the AIX XL compiler, the float args in the
6557 // parameter save area are initialized even if the argument is available
6558 // in register. The caller is required to initialize both the register
6559 // and memory, however, the callee can choose to expect it in either.
6560 // The memloc is dismissed here because the argument is retrieved from
6561 // the register.
6562 if (VA.isMemLoc() && VA.needsCustom())
6563 continue;
6564
6565 if (VA.isRegLoc()) {
6566 if (VA.getValVT().isScalarInteger())
6568 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector())
6569 FuncInfo->appendParameterType(VA.getValVT().SimpleTy == MVT::f32
6572 }
6573
6574 if (Flags.isByVal() && VA.isMemLoc()) {
6575 const unsigned Size =
6576 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
6577 PtrByteSize);
6578 const int FI = MF.getFrameInfo().CreateFixedObject(
6579 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
6580 /* IsAliased */ true);
6581 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6582 InVals.push_back(FIN);
6583
6584 continue;
6585 }
6586
6587 if (Flags.isByVal()) {
6588 assert(VA.isRegLoc() && "MemLocs should already be handled.");
6589
6590 const MCPhysReg ArgReg = VA.getLocReg();
6591 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
6592
6593 if (Flags.getNonZeroByValAlign() > PtrByteSize)
6594 report_fatal_error("Over aligned byvals not supported yet.");
6595
6596 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
6597 const int FI = MF.getFrameInfo().CreateFixedObject(
6598 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
6599 /* IsAliased */ true);
6600 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6601 InVals.push_back(FIN);
6602
6603 // Add live ins for all the RegLocs for the same ByVal.
6604 const TargetRegisterClass *RegClass =
6605 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6606
6607 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
6608 unsigned Offset) {
6609 const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
6610 // Since the callers side has left justified the aggregate in the
6611 // register, we can simply store the entire register into the stack
6612 // slot.
6613 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
6614 // The store to the fixedstack object is needed becuase accessing a
6615 // field of the ByVal will use a gep and load. Ideally we will optimize
6616 // to extracting the value from the register directly, and elide the
6617 // stores when the arguments address is not taken, but that will need to
6618 // be future work.
6619 SDValue Store = DAG.getStore(
6620 CopyFrom.getValue(1), dl, CopyFrom,
6623
6624 MemOps.push_back(Store);
6625 };
6626
6627 unsigned Offset = 0;
6628 HandleRegLoc(VA.getLocReg(), Offset);
6630 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
6631 Offset += PtrByteSize) {
6632 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
6633 "RegLocs should be for ByVal argument.");
6634
6635 const CCValAssign RL = ArgLocs[I++];
6638 }
6639
6640 if (Offset != StackSize) {
6641 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
6642 "Expected MemLoc for remaining bytes.");
6643 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
6644 // Consume the MemLoc.The InVal has already been emitted, so nothing
6645 // more needs to be done.
6646 ++I;
6647 }
6648
6649 continue;
6650 }
6651
6652 EVT ValVT = VA.getValVT();
6653 if (VA.isRegLoc() && !VA.needsCustom()) {
6655 unsigned VReg =
6656 MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
6657 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
6658 if (ValVT.isScalarInteger() &&
6659 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
6660 ArgValue =
6661 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
6662 }
6663 InVals.push_back(ArgValue);
6664 continue;
6665 }
6666 if (VA.isMemLoc()) {
6667 const unsigned LocSize = LocVT.getStoreSize();
6668 const unsigned ValSize = ValVT.getStoreSize();
6669 assert((ValSize <= LocSize) &&
6670 "Object size is larger than size of MemLoc");
6671 int CurArgOffset = VA.getLocMemOffset();
6672 // Objects are right-justified because AIX is big-endian.
6673 if (LocSize > ValSize)
6674 CurArgOffset += LocSize - ValSize;
6675 // Potential tail calls could cause overwriting of argument stack slots.
6676 const bool IsImmutable =
6678 (CallConv == CallingConv::Fast));
6679 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6680 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6681 SDValue ArgValue =
6682 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6683 InVals.push_back(ArgValue);
6684 continue;
6685 }
6686 }
6687
6688 // On AIX a minimum of 8 words is saved to the parameter save area.
6689 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
6690 // Area that is at least reserved in the caller of this function.
6691 unsigned CallerReservedArea =
6692 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
6693
6694 // Set the size that is at least reserved in caller of this function. Tail
6695 // call optimized function's reserved stack space needs to be aligned so
6696 // that taking the difference between two stack areas will result in an
6697 // aligned stack.
6701
6702 if (isVarArg) {
6703 FuncInfo->setVarArgsFrameIndex(
6704 MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
6706
6707 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6708 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6709
6710 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6711 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6712 const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
6713
6714 // The fixed integer arguments of a variadic function are stored to the
6715 // VarArgsFrameIndex on the stack so that they may be loaded by
6716 // dereferencing the result of va_next.
6717 for (unsigned GPRIndex =
6718 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
6720
6721 const unsigned VReg =
6722 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
6723 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
6724
6725 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
6726 SDValue Store =
6727 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
6728 MemOps.push_back(Store);
6729 // Increment the address for the next argument to store.
6731 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
6732 }
6733 }
6734
6735 if (!MemOps.empty())
6736 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
6737
6738 return Chain;
6739}
6740
6741SDValue PPCTargetLowering::LowerCall_AIX(
6742 SDValue Chain, SDValue Callee, CallFlags CFlags,
6744 const SmallVectorImpl<SDValue> &OutVals,
6745 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6747 const CallBase *CB) const {
6748 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
6749 // AIX ABI stack frame layout.
6750
6751 assert((CFlags.CallConv == CallingConv::C ||
6752 CFlags.CallConv == CallingConv::Cold ||
6753 CFlags.CallConv == CallingConv::Fast) &&
6754 "Unexpected calling convention!");
6755
6756 if (CFlags.IsPatchPoint)
6757 report_fatal_error("This call type is unimplemented on AIX.");
6758
6759 const PPCSubtarget& Subtarget =
6760 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
6761
6764 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
6765 *DAG.getContext());
6766
6767 // Reserve space for the linkage save area (LSA) on the stack.
6768 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
6769 // [SP][CR][LR][2 x reserved][TOC].
6770 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
6771 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6772 const bool IsPPC64 = Subtarget.isPPC64();
6773 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
6774 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6775 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6776 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
6777
6778 // The prolog code of the callee may store up to 8 GPR argument registers to
6779 // the stack, allowing va_start to index over them in memory if the callee
6780 // is variadic.
6781 // Because we cannot tell if this is needed on the caller side, we have to
6782 // conservatively assume that it is needed. As such, make sure we have at
6783 // least enough stack space for the caller to store the 8 GPRs.
6784 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
6785 const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
6786 CCInfo.getNextStackOffset());
6787
6788 // Adjust the stack pointer for the new arguments...
6789 // These operations are automatically eliminated by the prolog/epilog pass.
6790 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6791 SDValue CallSeqStart = Chain;
6792
6795
6796 // Set up a copy of the stack pointer for loading and storing any
6797 // arguments that may not fit in the registers available for argument
6798 // passing.
6799 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
6800 : DAG.getRegister(PPC::R1, MVT::i32);
6801
6802 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
6803 const unsigned ValNo = ArgLocs[I].getValNo();
6804 SDValue Arg = OutVals[ValNo];
6805 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
6806
6807 if (Flags.isByVal()) {
6808 const unsigned ByValSize = Flags.getByValSize();
6809
6810 // Nothing to do for zero-sized ByVals on the caller side.
6811 if (!ByValSize) {
6812 ++I;
6813 continue;
6814 }
6815
6816 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
6817 return DAG.getExtLoad(
6818 ISD::ZEXTLOAD, dl, PtrVT, Chain,
6819 (LoadOffset != 0)
6821 : Arg,
6822 MachinePointerInfo(), VT);
6823 };
6824
6825 unsigned LoadOffset = 0;
6826
6827 // Initialize registers, which are fully occupied by the by-val argument.
6828 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
6830 MemOpChains.push_back(Load.getValue(1));
6832 const CCValAssign &ByValVA = ArgLocs[I++];
6833 assert(ByValVA.getValNo() == ValNo &&
6834 "Unexpected location for pass-by-value argument.");
6835 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
6836 }
6837
6838 if (LoadOffset == ByValSize)
6839 continue;
6840
6841 // There must be one more loc to handle the remainder.
6842 assert(ArgLocs[I].getValNo() == ValNo &&
6843 "Expected additional location for by-value argument.");
6844
6845 if (ArgLocs[I].isMemLoc()) {
6846 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
6847 const CCValAssign &ByValVA = ArgLocs[I++];
6849 // Only memcpy the bytes that don't pass in register.
6850 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
6851 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
6852 (LoadOffset != 0)
6854 : Arg,
6855 DAG.getObjectPtrOffset(dl, StackPtr,
6856 TypeSize::Fixed(ByValVA.getLocMemOffset())),
6857 CallSeqStart, MemcpyFlags, DAG, dl);
6858 continue;
6859 }
6860
6861 // Initialize the final register residue.
6862 // Any residue that occupies the final by-val arg register must be
6863 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
6864 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
6865 // 2 and 1 byte loads.
6866 const unsigned ResidueBytes = ByValSize % PtrByteSize;
6868 "Unexpected register residue for by-value argument.");
6870 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
6871 const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
6872 const MVT VT =
6873 N == 1 ? MVT::i8
6874 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
6876 MemOpChains.push_back(Load.getValue(1));
6877 LoadOffset += N;
6878 Bytes += N;
6879
6880 // By-val arguments are passed left-justfied in register.
6881 // Every load here needs to be shifted, otherwise a full register load
6882 // should have been used.
6883 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
6884 "Unexpected load emitted during handling of pass-by-value "
6885 "argument.");
6886 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
6888 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
6891 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
6894 : ShiftedLoad;
6895 }
6896
6897 const CCValAssign &ByValVA = ArgLocs[I++];
6898 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
6899 continue;
6900 }
6901
6902 CCValAssign &VA = ArgLocs[I++];
6903 const MVT LocVT = VA.getLocVT();
6904 const MVT ValVT = VA.getValVT();
6905
6906 if (VA.isMemLoc() && VA.getValVT().isVector())
6908 "passing vector parameters to the stack is unimplemented for AIX");
6909
6910 switch (VA.getLocInfo()) {
6911 default:
6912 report_fatal_error("Unexpected argument extension type.");
6913 case CCValAssign::Full:
6914 break;
6915 case CCValAssign::ZExt:
6916 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6917 break;
6918 case CCValAssign::SExt:
6919 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6920 break;
6921 }
6922
6923 if (VA.isRegLoc() && !VA.needsCustom()) {
6924 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6925 continue;
6926 }
6927
6928 if (VA.isMemLoc()) {
6929 SDValue PtrOff =
6930 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
6931 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6932 MemOpChains.push_back(
6933 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6934
6935 continue;
6936 }
6937
6938 // Custom handling is used for GPR initializations for vararg float
6939 // arguments.
6940 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
6941 ValVT.isFloatingPoint() && LocVT.isInteger() &&
6942 "Unexpected register handling for calling convention.");
6943
6946
6947 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
6948 // f32 in 32-bit GPR
6949 // f64 in 64-bit GPR
6950 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
6951 else if (Arg.getValueType().getFixedSizeInBits() <
6952 LocVT.getFixedSizeInBits())
6953 // f32 in 64-bit GPR.
6954 RegsToPass.push_back(std::make_pair(
6955 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
6956 else {
6957 // f64 in two 32-bit GPRs
6958 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
6959 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
6960 "Unexpected custom register for argument!");
6961 CCValAssign &GPR1 = VA;
6963 DAG.getConstant(32, dl, MVT::i8));
6964 RegsToPass.push_back(std::make_pair(
6965 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
6966
6967 if (I != E) {
6968 // If only 1 GPR was available, there will only be one custom GPR and
6969 // the argument will also pass in memory.
6971 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
6972 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
6973 CCValAssign &GPR2 = ArgLocs[I++];
6974 RegsToPass.push_back(std::make_pair(
6975 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
6976 }
6977 }
6978 }
6979 }
6980
6981 if (!MemOpChains.empty())
6983
6984 // For indirect calls, we need to save the TOC base to the stack for
6985 // restoration after the call.
6986 if (CFlags.IsIndirect) {
6987 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
6988 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
6990 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
6991 const unsigned TOCSaveOffset =
6992 Subtarget.getFrameLowering()->getTOCSaveOffset();
6993
6994 setUsesTOCBasePtr(DAG);
6995 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
6996 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6998 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6999 Chain = DAG.getStore(
7000 Val.getValue(1), dl, Val, AddPtr,
7001 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7002 }
7003
7004 // Build a sequence of copy-to-reg nodes chained together with token chain
7005 // and flag operands which copy the outgoing args into the appropriate regs.
7007 for (auto Reg : RegsToPass) {
7008 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7009 InFlag = Chain.getValue(1);
7010 }
7011
7012 const int SPDiff = 0;
7013 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7014 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7015}
7016
7017bool
7018PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7019 MachineFunction &MF, bool isVarArg,
7021 LLVMContext &Context) const {
7023 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7024 return CCInfo.CheckReturn(
7025 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7027 : RetCC_PPC);
7028}
7029
7030SDValue
7031PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7032 bool isVarArg,
7034 const SmallVectorImpl<SDValue> &OutVals,
7035 const SDLoc &dl, SelectionDAG &DAG) const {
7037 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7038 *DAG.getContext());
7039 CCInfo.AnalyzeReturn(Outs,
7040 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7042 : RetCC_PPC);
7043
7044 SDValue Flag;
7046
7047 // Copy the result values into the output registers.
7048 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7049 CCValAssign &VA = RVLocs[i];
7050 assert(VA.isRegLoc() && "Can only return in registers!");
7051
7052 SDValue Arg = OutVals[RealResIdx];
7053
7054 switch (VA.getLocInfo()) {
7055 default: llvm_unreachable("Unknown loc info!");
7056 case CCValAssign::Full: break;
7057 case CCValAssign::AExt:
7058 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7059 break;
7060 case CCValAssign::ZExt:
7061 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7062 break;
7063 case CCValAssign::SExt:
7064 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7065 break;
7066 }
7067 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7068 bool isLittleEndian = Subtarget.isLittleEndian();
7069 // Legalize ret f64 -> ret 2 x i32.
7070 SDValue SVal =
7072 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7073 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7074 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7076 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7077 Flag = Chain.getValue(1);
7078 VA = RVLocs[++i]; // skip ahead to next loc
7079 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7080 } else
7081 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7082 Flag = Chain.getValue(1);
7083 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7084 }
7085
7086 RetOps[0] = Chain; // Update chain.
7087
7088 // Add the flag if we have it.
7089 if (Flag.getNode())
7090 RetOps.push_back(Flag);
7091
7092 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7093}
7094
7095SDValue
7096PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7097 SelectionDAG &DAG) const {
7098 SDLoc dl(Op);
7099
7100 // Get the correct type for integers.
7101 EVT IntVT = Op.getValueType();
7102
7103 // Get the inputs.
7104 SDValue Chain = Op.getOperand(0);
7105 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7106 // Build a DYNAREAOFFSET node.
7107 SDValue Ops[2] = {Chain, FPSIdx};
7108 SDVTList VTs = DAG.getVTList(IntVT);
7109 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7110}
7111
7112SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7113 SelectionDAG &DAG) const {
7114 // When we pop the dynamic allocation we need to restore the SP link.
7115 SDLoc dl(Op);
7116
7117 // Get the correct type for pointers.
7119
7120 // Construct the stack pointer operand.
7121 bool isPPC64 = Subtarget.isPPC64();
7122 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7123 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7124
7125 // Get the operands for the STACKRESTORE.
7126 SDValue Chain = Op.getOperand(0);
7127 SDValue SaveSP = Op.getOperand(1);
7128
7129 // Load the old link SP.
7131 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7132
7133 // Restore the stack pointer.
7134 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7135
7136 // Store the old link SP.
7137 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7138}
7139
7140SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7142 bool isPPC64 = Subtarget.isPPC64();
7144
7145 // Get current frame pointer save index. The users of this index will be
7146 // primarily DYNALLOC instructions.
7148 int RASI = FI->getReturnAddrSaveIndex();
7149
7150 // If the frame pointer save index hasn't been defined yet.
7151 if (!RASI) {
7152 // Find out what the fix offset of the frame pointer save area.
7153 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7154 // Allocate the frame index for frame pointer save area.
7155 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7156 // Save the result.
7158 }
7159 return DAG.getFrameIndex(RASI, PtrVT);
7160}
7161
7162SDValue
7163PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7165 bool isPPC64 = Subtarget.isPPC64();
7167
7168 // Get current frame pointer save index. The users of this index will be
7169 // primarily DYNALLOC instructions.
7171 int FPSI = FI->getFramePointerSaveIndex();
7172
7173 // If the frame pointer save index hasn't been defined yet.
7174 if (!FPSI) {
7175 // Find out what the fix offset of the frame pointer save area.
7176 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7177 // Allocate the frame index for frame pointer save area.
7178 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7179 // Save the result.
7181 }
7182 return DAG.getFrameIndex(FPSI, PtrVT);
7183}
7184
7185SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7186 SelectionDAG &DAG) const {
7188 // Get the inputs.
7189 SDValue Chain = Op.getOperand(0);
7190 SDValue Size = Op.getOperand(1);
7191 SDLoc dl(Op);
7192
7193 // Get the correct type for pointers.
7195 // Negate the size.
7197 DAG.getConstant(0, dl, PtrVT), Size);
7198 // Construct a node for the frame pointer save index.
7199 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7200 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7201 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7202 if (hasInlineStackProbe(MF))
7203 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7204 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7205}
7206
7207SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7208 SelectionDAG &DAG) const {
7210
7211 bool isPPC64 = Subtarget.isPPC64();
7213
7214 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7215 return DAG.getFrameIndex(FI, PtrVT);
7216}
7217
7218SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7219 SelectionDAG &DAG) const {
7220 SDLoc DL(Op);
7221 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7223 Op.getOperand(0), Op.getOperand(1));
7224}
7225
7226SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7227 SelectionDAG &DAG) const {
7228 SDLoc DL(Op);
7230 Op.getOperand(0), Op.getOperand(1));
7231}
7232
7233SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7234 if (Op.getValueType().isVector())
7235 return LowerVectorLoad(Op, DAG);
7236
7237 assert(Op.getValueType() == MVT::i1 &&
7238 "Custom lowering only for i1 loads");
7239
7240 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7241
7242 SDLoc dl(Op);
7244
7245 SDValue Chain = LD->getChain();
7246 SDValue BasePtr = LD->getBasePtr();
7247 MachineMemOperand *MMO = LD->getMemOperand();
7248
7249 SDValue NewLD =
7250 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7251 BasePtr, MVT::i8, MMO);
7253
7254 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7255 return DAG.getMergeValues(Ops, dl);
7256}
7257
7258SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7259 if (Op.getOperand(1).getValueType().isVector())
7260 return LowerVectorStore(Op, DAG);
7261
7262 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7263 "Custom lowering only for i1 stores");
7264
7265 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7266
7267 SDLoc dl(Op);
7269
7270 SDValue Chain = ST->getChain();
7271 SDValue BasePtr = ST->getBasePtr();
7272 SDValue Value = ST->getValue();
7273 MachineMemOperand *MMO = ST->getMemOperand();
7274
7276 Value);
7277 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7278}
7279
7280// FIXME: Remove this once the ANDI glue bug is fixed:
7281SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7282 assert(Op.getValueType() == MVT::i1 &&
7283 "Custom lowering only for i1 results");
7284
7285 SDLoc DL(Op);
7286 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7287}
7288
7289SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7290 SelectionDAG &DAG) const {
7291
7292 // Implements a vector truncate that fits in a vector register as a shuffle.
7293 // We want to legalize vector truncates down to where the source fits in
7294 // a vector register (and target is therefore smaller than vector register
7295 // size). At that point legalization will try to custom lower the sub-legal
7296 // result and get here - where we can contain the truncate as a single target
7297 // operation.
7298
7299 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7300 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7301 //
7302 // We will implement it for big-endian ordering as this (where x denotes
7303 // undefined):
7304 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7305 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7306 //
7307 // The same operation in little-endian ordering will be:
7308 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7309 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7310
7311 EVT TrgVT = Op.getValueType();
7312 assert(TrgVT.isVector() && "Vector type expected.");
7313 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7314 EVT EltVT = TrgVT.getVectorElementType();
7315 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7316 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7317 !isPowerOf2_32(EltVT.getSizeInBits()))
7318 return SDValue();
7319
7320 SDValue N1 = Op.getOperand(0);
7321 EVT SrcVT = N1.getValueType();
7322 unsigned SrcSize = SrcVT.getSizeInBits();
7323 if (SrcSize > 256 ||
7324 !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7325 !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
7326 return SDValue();
7327 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7328 return SDValue();
7329
7330 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7332
7333 SDLoc DL(Op);
7334 SDValue Op1, Op2;
7335 if (SrcSize == 256) {
7337 EVT SplitVT =
7338 N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7339 unsigned SplitNumElts = SplitVT.getVectorNumElements();
7341 DAG.getConstant(0, DL, VecIdxTy));
7344 }
7345 else {
7346 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7347 Op2 = DAG.getUNDEF(WideVT);
7348 }
7349
7350 // First list the elements we want to keep.
7351 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7353 if (Subtarget.isLittleEndian())
7354 for (unsigned i = 0; i < TrgNumElts; ++i)
7355 ShuffV.push_back(i * SizeMult);
7356 else
7357 for (unsigned i = 1; i <= TrgNumElts; ++i)
7358 ShuffV.push_back(i * SizeMult - 1);
7359
7360 // Populate the remaining elements with undefs.
7361 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7362 // ShuffV.push_back(i + WideNumElts);
7363 ShuffV.push_back(WideNumElts + 1);
7364
7365 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7366 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7367 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7368}
7369
7370/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7371/// possible.
7372SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7373 // Not FP, or using SPE? Not a fsel.
7374 if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7375 !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
7376 return Op;
7377
7378 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7379
7380 EVT ResVT = Op.getValueType();
7381 EVT CmpVT = Op.getOperand(0).getValueType();
7382 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7383 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7384 SDLoc dl(Op);
7385 SDNodeFlags Flags = Op.getNode()->getFlags();
7386
7387 // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7388 // presence of infinities.
7389 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7390 switch (CC) {
7391 default:
7392 break;
7393 case ISD::SETOGT:
7394 case ISD::SETGT:
7395 return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7396 case ISD::SETOLT:
7397 case ISD::SETLT:
7398 return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
7399 }
7400 }
7401
7402 // We might be able to do better than this under some circumstances, but in
7403 // general, fsel-based lowering of select is a finite-math-only optimization.
7404 // For more information, see section F.3 of the 2.06 ISA specification.
7405 // With ISA 3.0
7406 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7407 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7408 return Op;
7409
7410 // If the RHS of the comparison is a 0.0, we don't need to do the
7411 // subtraction at all.
7412 SDValue Sel1;
7413 if (isFloatingPointZero(RHS))
7414 switch (CC) {
7415 default: break; // SETUO etc aren't handled by fsel.
7416 case ISD::SETNE:
7417 std::swap(TV, FV);
7419 case ISD::SETEQ:
7420 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7421 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7422 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7423 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7425 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7426 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7427 case ISD::SETULT:
7428 case ISD::SETLT:
7429 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7431 case ISD::SETOGE:
7432 case ISD::SETGE:
7433 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7434 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7435 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7436 case ISD::SETUGT:
7437 case ISD::SETGT:
7438 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7440 case ISD::SETOLE:
7441 case ISD::SETLE:
7442 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7443 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7444 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7445 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7446 }
7447
7448 SDValue Cmp;
7449 switch (CC) {
7450 default: break; // SETUO etc aren't handled by fsel.
7451 case ISD::SETNE:
7452 std::swap(TV, FV);
7454 case ISD::SETEQ:
7455 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7456 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7457 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7458 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7459 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7461 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7462 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7463 case ISD::SETULT:
7464 case ISD::SETLT:
7465 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7466 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7467 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7468 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7469 case ISD::SETOGE:
7470 case ISD::SETGE:
7471 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7472 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7473 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7474 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7475 case ISD::SETUGT:
7476 case ISD::SETGT:
7477 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7478 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7479 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7480 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7481 case ISD::SETOLE:
7482 case ISD::SETLE:
7483 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7484 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7485 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7486 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7487 }
7488 return Op;
7489}
7490
7491static unsigned getPPCStrictOpcode(unsigned Opc) {
7492 switch (Opc) {
7493 default:
7494 llvm_unreachable("No strict version of this opcode!");
7495 case PPCISD::FCTIDZ:
7496 return PPCISD::STRICT_FCTIDZ;
7497 case PPCISD::FCTIWZ:
7498 return PPCISD::STRICT_FCTIWZ;
7499 case PPCISD::FCTIDUZ:
7501 case PPCISD::FCTIWUZ:
7503 case PPCISD::FCFID:
7504 return PPCISD::STRICT_FCFID;
7505 case PPCISD::FCFIDU:
7506 return PPCISD::STRICT_FCFIDU;
7507 case PPCISD::FCFIDS:
7508 return PPCISD::STRICT_FCFIDS;
7509 case PPCISD::FCFIDUS:
7511 }
7512}
7513
7515 const PPCSubtarget &Subtarget) {
7516 SDLoc dl(Op);
7517 bool IsStrict = Op->isStrictFPOpcode();
7518 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7519 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7520
7521 // TODO: Any other flags to propagate?
7522 SDNodeFlags Flags;
7523 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7524
7525 // For strict nodes, source is the second operand.
7526 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7527 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7529 if (Src.getValueType() == MVT::f32) {
7530 if (IsStrict) {
7531 Src =
7533 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
7534 Chain = Src.getValue(1);
7535 } else
7536 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7537 }
7538 SDValue Conv;
7539 unsigned Opc = ISD::DELETED_NODE;
7540 switch (Op.getSimpleValueType().SimpleTy) {
7541 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7542 case MVT::i32:
7543 Opc = IsSigned ? PPCISD::FCTIWZ
7544 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
7545 break;
7546 case MVT::i64:
7547 assert((IsSigned || Subtarget.hasFPCVT()) &&
7548 "i64 FP_TO_UINT is supported only with FPCVT");
7549 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
7550 }
7551 if (IsStrict) {
7552 Opc = getPPCStrictOpcode(Opc);
7553 Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
7554 {Chain, Src}, Flags);
7555 } else {
7556 Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
7557 }
7558 return Conv;
7559}
7560
7561void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7562 SelectionDAG &DAG,
7563 const SDLoc &dl) const {
7564 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
7565 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7566 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7567 bool IsStrict = Op->isStrictFPOpcode();
7568
7569 // Convert the FP value to an int value through memory.
7570 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
7571 (IsSigned || Subtarget.hasFPCVT());
7573 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7574 MachinePointerInfo MPI =
7576
7577 // Emit a store to the stack slot.
7578 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
7579 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
7580 if (i32Stack) {
7582 Alignment = Align(4);
7583 MachineMemOperand *MMO =
7584 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
7585 SDValue Ops[] = { Chain, Tmp, FIPtr };
7586 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7587 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7588 } else
7589 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
7590
7591 // Result is a load from the stack slot. If loading 4 bytes, make sure to
7592 // add in a bias on big endian.
7593 if (Op.getValueType() == MVT::i32 && !i32Stack) {
7594 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7595 DAG.getConstant(4, dl, FIPtr.getValueType()));
7596 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
7597 }
7598
7599 RLI.Chain = Chain;
7600 RLI.Ptr = FIPtr;
7601 RLI.MPI = MPI;
7602 RLI.Alignment = Alignment;
7603}
7604
7605/// Custom lowers floating point to integer conversions to use
7606/// the direct move instructions available in ISA 2.07 to avoid the
7607/// need for load/store combinations.
7608SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7609 SelectionDAG &DAG,
7610 const SDLoc &dl) const {
7611 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
7612 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
7613 if (Op->isStrictFPOpcode())
7614 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
7615 else
7616 return Mov;
7617}
7618
7619SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7620 const SDLoc &dl) const {
7621 bool IsStrict = Op->isStrictFPOpcode();
7622 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7623 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7624 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7625 EVT SrcVT = Src.getValueType();
7626 EVT DstVT = Op.getValueType();
7627
7628 // FP to INT conversions are legal for f128.
7629 if (SrcVT == MVT::f128)
7630 return Subtarget.hasP9Vector() ? Op : SDValue();
7631
7632 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7633 // PPC (the libcall is not available).
7634 if (SrcVT == MVT::ppcf128) {
7635 if (DstVT == MVT::i32) {
7636 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
7637 // set other fast-math flags to FP operations in both strict and
7638 // non-strict cases. (FP_TO_SINT, FSUB)
7639 SDNodeFlags Flags;
7640 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7641
7642 if (IsSigned) {
7644 DAG.getIntPtrConstant(0, dl));
7646 DAG.getIntPtrConstant(1, dl));
7647
7648 // Add the two halves of the long double in round-to-zero mode, and use
7649 // a smaller FP_TO_SINT.
7650 if (IsStrict) {
7653 {Op.getOperand(0), Lo, Hi}, Flags);
7654 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
7656 {Res.getValue(1), Res}, Flags);
7657 } else {
7658 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7659 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7660 }
7661 } else {
7662 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7664 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
7665 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
7666 if (IsStrict) {
7667 // Sel = Src < 0x80000000
7668 // FltOfs = select Sel, 0.0, 0x80000000
7669 // IntOfs = select Sel, 0, 0x80000000
7670 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
7671 SDValue Chain = Op.getOperand(0);
7672 EVT SetCCVT =
7674 EVT DstSetCCVT =
7676 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
7677 Chain, true);
7678 Chain = Sel.getValue(1);
7679
7680 SDValue FltOfs = DAG.getSelect(
7681 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
7683
7684 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
7686 {Chain, Src, FltOfs}, Flags);
7687 Chain = Val.getValue(1);
7690 {Chain, Val}, Flags);
7691 Chain = SInt.getValue(1);
7692 SDValue IntOfs = DAG.getSelect(
7693 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
7695 return DAG.getMergeValues({Result, Chain}, dl);
7696 } else {
7697 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7698 // FIXME: generated code sucks.
7699 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
7700 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7701 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
7702 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
7703 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
7704 }
7705 }
7706 }
7707
7708 return SDValue();
7709 }
7710
7711 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
7712 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7713
7714 ReuseLoadInfo RLI;
7715 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7716
7717 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7718 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7719}
7720
7721// We're trying to insert a regular store, S, and then a load, L. If the
7722// incoming value, O, is a load, we might just be able to have our load use the
7723// address used by O. However, we don't know if anything else will store to
7724// that address before we can load from it. To prevent this situation, we need
7725// to insert our load, L, into the chain as a peer of O. To do this, we give L
7726// the same chain operand as O, we create a token factor from the chain results
7727// of O and L, and we replace all uses of O's chain result with that token
7728// factor (see spliceIntoChain below for this last part).
7729bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7730 ReuseLoadInfo &RLI,
7731 SelectionDAG &DAG,
7732 ISD::LoadExtType ET) const {
7733 // Conservatively skip reusing for constrained FP nodes.
7734 if (Op->isStrictFPOpcode())
7735 return false;
7736
7737 SDLoc dl(Op);
7738 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
7739 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
7740 if (ET == ISD::NON_EXTLOAD &&
7741 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
7742 isOperationLegalOrCustom(Op.getOpcode(),
7743 Op.getOperand(0).getValueType())) {
7744
7745 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7746 return true;
7747 }
7748
7750 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
7751 LD->isNonTemporal())
7752 return false;
7753 if (LD->getMemoryVT() != MemVT)
7754 return false;
7755
7756 // If the result of the load is an illegal type, then we can't build a
7757 // valid chain for reuse since the legalised loads and token factor node that
7758 // ties the legalised loads together uses a different output chain then the
7759 // illegal load.
7760 if (!isTypeLegal(LD->getValueType(0)))
7761 return false;
7762
7763 RLI.Ptr = LD->getBasePtr();
7764 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
7765 assert(LD->getAddressingMode() == ISD::PRE_INC &&
7766 "Non-pre-inc AM on PPC?");
7767 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7768 LD->getOffset());
7769 }
7770
7771 RLI.Chain = LD->getChain();
7772 RLI.MPI = LD->getPointerInfo();
7773 RLI.IsDereferenceable = LD->isDereferenceable();
7774 RLI.IsInvariant = LD->isInvariant();
7775 RLI.Alignment = LD->getAlign();
7776 RLI.AAInfo = LD->getAAInfo();
7777 RLI.Ranges = LD->getRanges();
7778
7779 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
7780 return true;
7781}
7782
7783// Given the head of the old chain, ResChain, insert a token factor containing
7784// it and NewResChain, and make users of ResChain now be users of that token
7785// factor.
7786// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7787void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7789 SelectionDAG &DAG) const {
7790 if (!ResChain)
7791 return;
7792
7793 SDLoc dl(NewResChain);
7794
7797 assert(TF.getNode() != NewResChain.getNode() &&
7798 "A new TF really is required here");
7799
7800 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7801 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7802}
7803
7804/// Analyze profitability of direct move
7805/// prefer float load to int load plus direct move
7806/// when there is no integer use of int load
7807bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7808 SDNode *Origin = Op.getOperand(0).getNode();
7809 if (Origin->getOpcode() != ISD::LOAD)
7810 return true;
7811
7812 // If there is no LXSIBZX/LXSIHZX, like Power8,
7813 // prefer direct move if the memory size is 1 or 2 bytes.
7814 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7815 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
7816 return true;
7817
7818 for (SDNode::use_iterator UI = Origin->use_begin(),
7819 UE = Origin->use_end();
7820 UI != UE; ++UI) {
7821
7822 // Only look at the users of the loaded value.
7823 if (UI.getUse().get().getResNo() != 0)
7824 continue;
7825
7826 if (UI->getOpcode() != ISD::SINT_TO_FP &&
7827 UI->getOpcode() != ISD::UINT_TO_FP &&
7828 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
7829 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
7830 return true;
7831 }
7832
7833 return false;
7834}
7835
7837 const PPCSubtarget &Subtarget,
7838 SDValue Chain = SDValue()) {
7839 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
7840 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7841 SDLoc dl(Op);
7842
7843 // TODO: Any other flags to propagate?
7844 SDNodeFlags Flags;
7845 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7846
7847 // If we have FCFIDS, then use it when converting to single-precision.
7848 // Otherwise, convert to double-precision and then round.
7849 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
7850 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
7851 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
7853 if (Op->isStrictFPOpcode()) {
7854 if (!Chain)
7855 Chain = Op.getOperand(0);
7856 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
7857 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
7858 } else
7859 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
7860}
7861
7862/// Custom lowers integer to floating point conversions to use
7863/// the direct move instructions available in ISA 2.07 to avoid the
7864/// need for load/store combinations.
7865SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7866 SelectionDAG &DAG,
7867 const SDLoc &dl) const {
7868 assert((Op.getValueType() == MVT::f32 ||
7869 Op.getValueType() == MVT::f64) &&
7870 "Invalid floating point type as target of conversion");
7871 assert(Subtarget.hasFPCVT() &&
7872 "Int to FP conversions with direct moves require FPCVT");
7873 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
7875 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
7876 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7877 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
7878 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
7879 return convertIntToFP(Op, Mov, DAG, Subtarget);
7880}
7881
7882static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7883
7884 EVT VecVT = Vec.getValueType();
7885 assert(VecVT.isVector() && "Expected a vector type.");
7886 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
7887
7888 EVT EltVT = VecVT.getVectorElementType();
7889 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7891
7892 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7894 Ops[0] = Vec;
7895 SDValue UndefVec = DAG.getUNDEF(VecVT);
7896 for (unsigned i = 1; i < NumConcat; ++i)
7897 Ops[i] = UndefVec;
7898
7899 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7900}
7901
7902SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7903 const SDLoc &dl) const {
7904 bool IsStrict = Op->isStrictFPOpcode();
7905 unsigned Opc = Op.getOpcode();
7906 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7907 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
7909 "Unexpected conversion type");
7910 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
7911 "Supports conversions to v2f64/v4f32 only.");
7912
7913 // TODO: Any other flags to propagate?
7914 SDNodeFlags Flags;
7915 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7916
7917 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
7918 bool FourEltRes = Op.getValueType() == MVT::v4f32;
7919
7920 SDValue Wide = widenVec(DAG, Src, dl);
7921 EVT WideVT = Wide.getValueType();
7922 unsigned WideNumElts = WideVT.getVectorNumElements();
7924
7926 for (unsigned i = 0; i < WideNumElts; ++i)
7927 ShuffV.push_back(i + WideNumElts);
7928
7929 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
7930 int SaveElts = FourEltRes ? 4 : 2;
7931 if (Subtarget.isLittleEndian())
7932 for (int i = 0; i < SaveElts; i++)
7933 ShuffV[i * Stride] = i;
7934 else
7935 for (int i = 1; i <= SaveElts; i++)
7936 ShuffV[i * Stride - 1] = i - 1;
7937
7939 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
7941
7943 if (SignedConv) {
7945 EVT ExtVT = Src.getValueType();
7946 if (Subtarget.hasP9Altivec())
7947 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
7948 IntermediateVT.getVectorNumElements());
7949
7951 DAG.getValueType(ExtVT));
7952 } else
7954
7955 if (IsStrict)
7956 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
7957 {Op.getOperand(0), Extend}, Flags);
7958
7959 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
7960}
7961
7962SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
7963 SelectionDAG &DAG) const {
7964 SDLoc dl(Op);
7965 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
7966 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7967 bool IsStrict = Op->isStrictFPOpcode();
7968 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7969 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
7970
7971 // TODO: Any other flags to propagate?
7972 SDNodeFlags Flags;
7973 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7974
7975 EVT InVT = Src.getValueType();
7976 EVT OutVT = Op.getValueType();
7977 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
7978 isOperationCustom(Op.getOpcode(), InVT))
7979 return LowerINT_TO_FPVector(Op, DAG, dl);
7980
7981 // Conversions to f128 are legal.
7982 if (Op.getValueType() == MVT::f128)
7983 return Subtarget.hasP9Vector() ? Op : SDValue();
7984
7985 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7986 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
7987 return SDValue();
7988
7989 if (Src.getValueType() == MVT::i1) {
7990 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
7991 DAG.getConstantFP(1.0, dl, Op.getValueType()),
7992 DAG.getConstantFP(0.0, dl, Op.getValueType()));
7993 if (IsStrict)
7994 return DAG.getMergeValues({Sel, Chain}, dl);
7995 else
7996 return Sel;
7997 }
7998
7999 // If we have direct moves, we can do all the conversion, skip the store/load
8000 // however, without FPCVT we can't do most conversions.
8001 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8002 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8003 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8004
8005 assert((IsSigned || Subtarget.hasFPCVT()) &&
8006 "UINT_TO_FP is supported only with FPCVT");
8007
8008 if (Src.getValueType() == MVT::i64) {
8009 SDValue SINT = Src;
8010 // When converting to single-precision, we actually need to convert
8011 // to double-precision first and then round to single-precision.
8012 // To avoid double-rounding effects during that operation, we have
8013 // to prepare the input operand. Bits that might be truncated when
8014 // converting to double-precision are replaced by a bit that won't
8015 // be lost at this stage, but is below the single-precision rounding
8016 // position.
8017 //
8018 // However, if -enable-unsafe-fp-math is in effect, accept double
8019 // rounding to avoid the extra overhead.
8020 if (Op.getValueType() == MVT::f32 &&
8021 !Subtarget.hasFPCVT() &&
8023
8024 // Twiddle input to make sure the low 11 bits are zero. (If this
8025 // is the case, we are guaranteed the value will fit into the 53 bit
8026 // mantissa of an IEEE double-precision value without rounding.)
8027 // If any of those low 11 bits were not zero originally, make sure
8028 // bit 12 (value 2048) is set instead, so that the final rounding
8029 // to single-precision gets the correct result.
8031 SINT, DAG.getConstant(2047, dl, MVT::i64));
8032 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8033 Round, DAG.getConstant(2047, dl, MVT::i64));
8034 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8035 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8036 Round, DAG.getConstant(-2048, dl, MVT::i64));
8037
8038 // However, we cannot use that value unconditionally: if the magnitude
8039 // of the input value is small, the bit-twiddling we did above might
8040 // end up visibly changing the output. Fortunately, in that case, we
8041 // don't need to twiddle bits since the original input will convert
8042 // exactly to double-precision floating-point already. Therefore,
8043 // construct a conditional to use the original value if the top 11
8044 // bits are all sign-bit copies, and use the rounded value computed
8045 // above otherwise.
8047 SINT, DAG.getConstant(53, dl, MVT::i32));
8048 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8049 Cond, DAG.getConstant(1, dl, MVT::i64));
8050 Cond = DAG.getSetCC(
8051 dl,
8053 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8054
8056 }
8057
8058 ReuseLoadInfo RLI;
8059 SDValue Bits;
8060
8062 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8063 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8064 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8065 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8066 } else if (Subtarget.hasLFIWAX() &&
8067 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8068 MachineMemOperand *MMO =
8070 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8071 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8074 Ops, MVT::i32, MMO);
8075 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8076 } else if (Subtarget.hasFPCVT() &&
8077 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8078 MachineMemOperand *MMO =
8080 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8081 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8084 Ops, MVT::i32, MMO);
8085 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8086 } else if (((Subtarget.hasLFIWAX() &&
8087 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8088 (Subtarget.hasFPCVT() &&
8089 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8090 SINT.getOperand(0).getValueType() == MVT::i32) {
8091 MachineFrameInfo &MFI = MF.getFrameInfo();
8093
8094 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8095 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8096
8097 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8099 DAG.getMachineFunction(), FrameIdx));
8100 Chain = Store;
8101
8102 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8103 "Expected an i32 store");
8104
8105 RLI.Ptr = FIdx;
8106 RLI.Chain = Chain;
8107 RLI.MPI =
8109 RLI.Alignment = Align(4);
8110
8111 MachineMemOperand *MMO =
8113 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8114 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8115 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8116 PPCISD::LFIWZX : PPCISD::LFIWAX,
8117 dl, DAG.getVTList(MVT::f64, MVT::Other),
8118 Ops, MVT::i32, MMO);
8119 Chain = Bits.getValue(1);
8120 } else
8121 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8122
8123 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8124 if (IsStrict)
8125 Chain = FP.getValue(1);
8126
8127 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8128 if (IsStrict)
8131 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8132 else
8133 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8134 DAG.getIntPtrConstant(0, dl));
8135 }
8136 return FP;
8137 }
8138
8139 assert(Src.getValueType() == MVT::i32 &&
8140 "Unhandled INT_TO_FP type in custom expander!");
8141 // Since we only generate this in 64-bit mode, we can take advantage of
8142 // 64-bit registers. In particular, sign extend the input value into the
8143 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8144 // then lfd it and fcfid it.
8146 MachineFrameInfo &MFI = MF.getFrameInfo();
8148
8149 SDValue Ld;
8150 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8151 ReuseLoadInfo RLI;
8152 bool ReusingLoad;
8153 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8154 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8155 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8156
8157 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8159 DAG.getMachineFunction(), FrameIdx));
8160 Chain = Store;
8161
8162 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8163 "Expected an i32 store");
8164
8165 RLI.Ptr = FIdx;
8166 RLI.Chain = Chain;
8167 RLI.MPI =
8169 RLI.Alignment = Align(4);
8170 }
8171
8172 MachineMemOperand *MMO =
8174 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8175 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8176 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8177 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8178 MVT::i32, MMO);
8179 Chain = Ld.getValue(1);
8180 if (ReusingLoad)
8181 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8182 } else {
8183 assert(Subtarget.isPPC64() &&
8184 "i32->FP without LFIWAX supported only on PPC64");
8185
8186 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8187 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8188
8190
8191 // STD the extended value into the stack slot.
8192 SDValue Store = DAG.getStore(
8193 Chain, dl, Ext64, FIdx,
8195 Chain = Store;
8196
8197 // Load the value as a double.
8198 Ld = DAG.getLoad(
8199 MVT::f64, dl, Chain, FIdx,
8201 Chain = Ld.getValue(1);
8202 }
8203
8204 // FCFID it and return it.
8205 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8206 if (IsStrict)
8207 Chain = FP.getValue(1);
8208 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8209 if (IsStrict)
8212 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8213 else
8214 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8215 DAG.getIntPtrConstant(0, dl));
8216 }
8217 return FP;
8218}
8219
8220SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8221 SelectionDAG &DAG) const {
8222 SDLoc dl(Op);
8223 /*
8224 The rounding mode is in bits 30:31 of FPSR, and has the following
8225 settings:
8226 00 Round to nearest
8227 01 Round to 0
8228 10 Round to +inf
8229 11 Round to -inf
8230
8231 FLT_ROUNDS, on the other hand, expects the following:
8232 -1 Undefined
8233 0 Round to 0
8234 1 Round to nearest
8235 2 Round to +inf
8236 3 Round to -inf
8237
8238 To perform the conversion, we do:
8239 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8240 */
8241
8243 EVT VT = Op.getValueType();
8245
8246 // Save FP Control Word to register
8247 SDValue Chain = Op.getOperand(0);
8248 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8249 Chain = MFFS.getValue(1);
8250
8251 SDValue CWD;
8252 if (isTypeLegal(MVT::i64)) {
8253 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8254 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8255 } else {
8256 // Save FP register to stack slot
8257 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8258 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8259 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8260
8261 // Load FP Control Word from low 32 bits of stack slot.
8263 "Stack slot adjustment is valid only on big endian subtargets!");
8264 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8265 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8266 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8267 Chain = CWD.getValue(1);
8268 }
8269
8270 // Transform as necessary
8271 SDValue CWD1 =
8272 DAG.getNode(ISD::AND, dl, MVT::i32,
8273 CWD, DAG.getConstant(3, dl, MVT::i32));
8274 SDValue CWD2 =
8275 DAG.getNode(ISD::SRL, dl, MVT::i32,
8276 DAG.getNode(ISD::AND, dl, MVT::i32,
8277 DAG.getNode(ISD::XOR, dl, MVT::i32,
8278 CWD, DAG.getConstant(3, dl, MVT::i32)),
8279 DAG.getConstant(3, dl, MVT::i32)),
8280 DAG.getConstant(1, dl, MVT::i32));
8281
8282 SDValue RetVal =
8283 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8284
8285 RetVal =
8286 DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8287 dl, VT, RetVal);
8288
8289 return DAG.getMergeValues({RetVal, Chain}, dl);
8290}
8291
8292SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8293 EVT VT = Op.getValueType();
8294 unsigned BitWidth = VT.getSizeInBits();
8295 SDLoc dl(Op);
8296 assert(Op.getNumOperands() == 3 &&
8297 VT == Op.getOperand(1).getValueType() &&
8298 "Unexpected SHL!");
8299
8300 // Expand into a bunch of logical ops. Note that these ops
8301 // depend on the PPC behavior for oversized shift amounts.
8302 SDValue Lo = Op.getOperand(0);
8303 SDValue Hi = Op.getOperand(1);
8304 SDValue Amt = Op.getOperand(2);
8305 EVT AmtVT = Amt.getValueType();
8306
8307 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8308 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8309 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8310 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8311 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8312 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8313 DAG.getConstant(-BitWidth, dl, AmtVT));
8314 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8315 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8316 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8317 SDValue OutOps[] = { OutLo, OutHi };
8318 return DAG.getMergeValues(OutOps, dl);
8319}
8320
8321SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8322 EVT VT = Op.getValueType();
8323 SDLoc dl(Op);
8324 unsigned BitWidth = VT.getSizeInBits();
8325 assert(Op.getNumOperands() == 3 &&
8326 VT == Op.getOperand(1).getValueType() &&
8327 "Unexpected SRL!");
8328
8329 // Expand into a bunch of logical ops. Note that these ops
8330 // depend on the PPC behavior for oversized shift amounts.
8331 SDValue Lo = Op.getOperand(0);
8332 SDValue Hi = Op.getOperand(1);
8333 SDValue Amt = Op.getOperand(2);
8334 EVT AmtVT = Amt.getValueType();
8335
8336 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8337 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8338 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8339 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8340 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8341 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8342 DAG.getConstant(-BitWidth, dl, AmtVT));
8343 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8344 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8345 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8346 SDValue OutOps[] = { OutLo, OutHi };
8347 return DAG.getMergeValues(OutOps, dl);
8348}
8349
8350SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8351 SDLoc dl(Op);
8352 EVT VT = Op.getValueType();
8353 unsigned BitWidth = VT.getSizeInBits();
8354 assert(Op.getNumOperands() == 3 &&
8355 VT == Op.getOperand(1).getValueType() &&
8356 "Unexpected SRA!");
8357
8358 // Expand into a bunch of logical ops, followed by a select_cc.
8359 SDValue Lo = Op.getOperand(0);
8360 SDValue Hi = Op.getOperand(1);
8361 SDValue Amt = Op.getOperand(2);
8362 EVT AmtVT = Amt.getValueType();
8363
8364 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8365 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8366 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8367 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8368 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8369 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8370 DAG.getConstant(-BitWidth, dl, AmtVT));
8371 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8372 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8373 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8374 Tmp4, Tmp6, ISD::SETLE);
8375 SDValue OutOps[] = { OutLo, OutHi };
8376 return DAG.getMergeValues(OutOps, dl);
8377}
8378
8379SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8380 SelectionDAG &DAG) const {
8381 SDLoc dl(Op);
8382 EVT VT = Op.getValueType();
8383 unsigned BitWidth = VT.getSizeInBits();
8384
8385 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8386 SDValue X = Op.getOperand(0);
8387 SDValue Y = Op.getOperand(1);
8388 SDValue Z = Op.getOperand(2);
8389 EVT AmtVT = Z.getValueType();
8390
8391 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8392 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8393 // This is simpler than TargetLowering::expandFunnelShift because we can rely
8394 // on PowerPC shift by BW being well defined.
8395 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8396 DAG.getConstant(BitWidth - 1, dl, AmtVT));
8397 SDValue SubZ =
8398 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8399 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8400 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8401 return DAG.getNode(ISD::OR, dl, VT, X, Y);
8402}
8403
8404//===----------------------------------------------------------------------===//
8405// Vector related lowering.
8406//
8407
8408/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8409/// element size of SplatSize. Cast the result to VT.
8410static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8411 SelectionDAG &DAG, const SDLoc &dl) {
8412 static const MVT VTys[] = { // canonical VT to use for each size.
8414 };
8415
8416 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8417
8418 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8419 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8420 SplatSize = 1;
8421 Val = 0xFF;
8422 }
8423
8425
8426 // Build a canonical splat for this value.
8427 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8428}
8429
8430/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8431/// specified intrinsic ID.
8432static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8433 const SDLoc &dl, EVT DestVT = MVT::Other) {
8434 if (DestVT == MVT::Other) DestVT = Op.getValueType();
8435 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8436 DAG.getConstant(IID, dl, MVT::i32), Op);
8437}
8438
8439/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8440/// specified intrinsic ID.
8441static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8442 SelectionDAG &DAG, const SDLoc &dl,
8443 EVT DestVT = MVT::Other) {
8444 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8445 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8446 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8447}
8448
8449/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8450/// specified intrinsic ID.
8451static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8452 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8453 EVT DestVT = MVT::Other) {
8454 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8455 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8456 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8457}
8458
8459/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8460/// amount. The result has the specified value type.
8461static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8462 SelectionDAG &DAG, const SDLoc &dl) {
8463 // Force LHS/RHS to be the right type.
8464 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8465 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8466
8467 int Ops[16];
8468 for (unsigned i = 0; i != 16; ++i)
8469 Ops[i] = i + Amt;
8470 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8471 return DAG.getNode(ISD::BITCAST, dl, VT, T);
8472}
8473
8474/// Do we have an efficient pattern in a .td file for this node?
8475///
8476/// \param V - pointer to the BuildVectorSDNode being matched
8477/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8478///
8479/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8480/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8481/// the opposite is true (expansion is beneficial) are:
8482/// - The node builds a vector out of integers that are not 32 or 64-bits
8483/// - The node builds a vector out of constants
8484/// - The node is a "load-and-splat"
8485/// In all other cases, we will choose to keep the BUILD_VECTOR.
8487 bool HasDirectMove,
8488 bool HasP8Vector) {
8489 EVT VecVT = V->getValueType(0);
8490 bool RightType = VecVT == MVT::v2f64 ||
8491 (HasP8Vector && VecVT == MVT::v4f32) ||
8492 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8493 if (!RightType)
8494 return false;
8495
8496 bool IsSplat = true;
8497 bool IsLoad = false;
8498 SDValue Op0 = V->getOperand(0);
8499
8500 // This function is called in a block that confirms the node is not a constant
8501 // splat. So a constant BUILD_VECTOR here means the vector is built out of
8502 // different constants.
8503 if (V->isConstant())
8504 return false;
8505 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8506 if (V->getOperand(i).isUndef())
8507 return false;
8508 // We want to expand nodes that represent load-and-splat even if the
8509 // loaded value is a floating point truncation or conversion to int.
8510 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8511 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8512 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8513 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8514 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8515 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8516 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8517 IsLoad = true;
8518 // If the operands are different or the input is not a load and has more
8519 // uses than just this BV node, then it isn't a splat.
8520 if (V->getOperand(i) != Op0 ||
8521 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8522 IsSplat = false;
8523 }
8524 return !(IsSplat && IsLoad);
8525}
8526
8527// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8528SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8529
8530 SDLoc dl(Op);
8531 SDValue Op0 = Op->getOperand(0);
8532
8533 if ((Op.getValueType() != MVT::f128) ||
8534 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8535 (Op0.getOperand(0).getValueType() != MVT::i64) ||
8536 (Op0.getOperand(1).getValueType() != MVT::i64))
8537 return SDValue();
8538
8539 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8540 Op0.getOperand(1));
8541}
8542
8543static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
8544 const SDValue *InputLoad = &Op;
8545 if (InputLoad->getOpcode() == ISD::BITCAST)
8546 InputLoad = &InputLoad->getOperand(0);
8547 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
8550 InputLoad = &InputLoad->getOperand(0);
8551 }
8552 if (InputLoad->getOpcode() != ISD::LOAD)
8553 return nullptr;
8555 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
8556}
8557
8558// Convert the argument APFloat to a single precision APFloat if there is no
8559// loss in information during the conversion to single precision APFloat and the
8560// resulting number is not a denormal number. Return true if successful.
8571
8572// Bitcast the argument APInt to a double and convert it to a single precision
8573// APFloat, bitcast the APFloat to an APInt and assign it to the original
8574// argument if there is no loss in information during the conversion from
8575// double to single precision APFloat and the resulting number is not a denormal
8576// number. Return true if successful.
8578 double DpValue = ArgAPInt.bitsToDouble();
8581 if (Success)
8582 ArgAPInt = APFloatDp.bitcastToAPInt();
8583 return Success;
8584}
8585
8586// If this is a case we can't handle, return null and let the default
8587// expansion code take care of it. If we CAN select this case, and if it
8588// selects to a single instruction, return Op. Otherwise, if we can codegen
8589// this case more efficiently than a constant pool load, lower it to the
8590// sequence of ops that should be used.
8591SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8592 SelectionDAG &DAG) const {
8593 SDLoc dl(Op);
8595 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8596
8597 // Check if this is a splat of a constant value.
8599 unsigned SplatBitSize;
8600 bool HasAnyUndefs;
8601 bool BVNIsConstantSplat =
8602 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8603 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
8604
8605 // If it is a splat of a double, check if we can shrink it to a 32 bit
8606 // non-denormal float which when converted back to double gives us the same
8607 // double. This is to exploit the XXSPLTIDP instruction.
8608 // If we lose precision, we use XXSPLTI32DX.
8609 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
8610 Subtarget.hasPrefixInstrs()) {
8611 // Check the type first to short-circuit so we don't modify APSplatBits if
8612 // this block isn't executed.
8613 if ((Op->getValueType(0) == MVT::v2f64) &&
8617 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
8618 return DAG.getBitcast(Op.getValueType(), SplatNode);
8619 } else {
8620 // We may lose precision, so we have to use XXSPLTI32DX.
8621
8622 uint32_t Hi =
8623 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
8624 uint32_t Lo =
8625 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
8627
8628 if (!Hi || !Lo)
8629 // If either load is 0, then we should generate XXLXOR to set to 0.
8631
8632 if (Hi)
8633 SplatNode = DAG.getNode(
8635 DAG.getTargetConstant(0, dl, MVT::i32),
8636 DAG.getTargetConstant(Hi, dl, MVT::i32));
8637
8638 if (Lo)
8639 SplatNode =
8641 DAG.getTargetConstant(1, dl, MVT::i32),
8642 DAG.getTargetConstant(Lo, dl, MVT::i32));
8643
8644 return DAG.getBitcast(Op.getValueType(), SplatNode);
8645 }
8646 }
8647
8648 if (!BVNIsConstantSplat || SplatBitSize > 32) {
8649
8650 bool IsPermutedLoad = false;
8651 const SDValue *InputLoad =
8652 getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
8653 // Handle load-and-splat patterns as we have instructions that will do this
8654 // in one go.
8655 if (InputLoad && DAG.isSplatValue(Op, true)) {
8657
8658 // We have handling for 4 and 8 byte elements.
8659 unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
8660
8661 // Checking for a single use of this load, we have to check for vector
8662 // width (128 bits) / ElementSize uses (since each operand of the
8663 // BUILD_VECTOR is a separate use of the value.
8664 unsigned NumUsesOfInputLD = 128 / ElementSize;
8665 for (SDValue BVInOp : Op->ops())
8666 if (BVInOp.isUndef())
8668 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
8669 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
8670 ((Subtarget.hasVSX() && ElementSize == 64) ||
8671 (Subtarget.hasP9Vector() && ElementSize == 32))) {
8672 SDValue Ops[] = {
8673 LD->getChain(), // Chain
8674 LD->getBasePtr(), // Ptr
8675 DAG.getValueType(Op.getValueType()) // VT
8676 };
8678 PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8679 Ops, LD->getMemoryVT(), LD->getMemOperand());
8680 // Replace all uses of the output chain of the original load with the
8681 // output chain of the new load.
8682 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
8683 LdSplt.getValue(1));
8684 return LdSplt;
8685 }
8686 }
8687
8688 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
8689 // 32-bits can be lowered to VSX instructions under certain conditions.
8690 // Without VSX, there is no pattern more efficient than expanding the node.
8691 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
8693 Subtarget.hasP8Vector()))
8694 return Op;
8695 return SDValue();
8696 }
8697
8698 uint64_t SplatBits = APSplatBits.getZExtValue();
8699 uint64_t SplatUndef = APSplatUndef.getZExtValue();
8700 unsigned SplatSize = SplatBitSize / 8;
8701
8702 // First, handle single instruction cases.
8703
8704 // All zeros?
8705 if (SplatBits == 0) {
8706 // Canonicalize all zero vectors to be v4i32.
8707 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
8708 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8709 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8710 }
8711 return Op;
8712 }
8713
8714 // We have XXSPLTIW for constant splats four bytes wide.
8715 // Given vector length is a multiple of 4, 2-byte splats can be replaced
8716 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
8717 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
8718 // turned into a 4-byte splat of 0xABABABAB.
8719 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
8720 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
8721 Op.getValueType(), DAG, dl);
8722
8723 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
8724 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
8725 dl);
8726
8727 // We have XXSPLTIB for constant splats one byte wide.
8728 if (Subtarget.hasP9Vector() && SplatSize == 1)
8729 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
8730 dl);
8731
8732 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8733 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8734 (32-SplatBitSize));
8735 if (SextVal >= -16 && SextVal <= 15)
8736 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
8737 dl);
8738
8739 // Two instruction sequences.
8740
8741 // If this value is in the range [-32,30] and is even, use:
8742 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8743 // If this value is in the range [17,31] and is odd, use:
8744 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8745 // If this value is in the range [-31,-17] and is odd, use:
8746 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8747 // Note the last two are three-instruction sequences.
8748 if (SextVal >= -32 && SextVal <= 31) {
8749 // To avoid having these optimizations undone by constant folding,
8750 // we convert to a pseudo that will be expanded later into one of
8751 // the above forms.
8753 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
8754 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
8755 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8756 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8757 if (VT == Op.getValueType())
8758 return RetVal;
8759 else
8760 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8761 }
8762
8763 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
8764 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
8765 // for fneg/fabs.
8766 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
8767 // Make -1 and vspltisw -1:
8769
8770 // Make the VSLW intrinsic, computing 0x8000_0000.
8771 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
8772 OnesV, DAG, dl);
8773
8774 // xor by OnesV to invert it.
8775 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8776 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8777 }
8778
8779 // Check to see if this is a wide variety of vsplti*, binop self cases.
8780 static const signed char SplatCsts[] = {
8781 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8782 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8783 };
8784
8785 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
8786 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8787 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
8788 int i = SplatCsts[idx];
8789
8790 // Figure out what shift amount will be used by altivec if shifted by i in
8791 // this splat size.
8792 unsigned TypeShiftAmt = i & (SplatBitSize-1);
8793
8794 // vsplti + shl self.
8795 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8797 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8798 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
8799 Intrinsic::ppc_altivec_vslw
8800 };
8801 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8802 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8803 }
8804
8805 // vsplti + srl self.
8806 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8808 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8809 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
8810 Intrinsic::ppc_altivec_vsrw
8811 };
8812 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8813 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8814 }
8815
8816 // vsplti + rol self.
8817 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8818 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8820 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8821 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
8822 Intrinsic::ppc_altivec_vrlw
8823 };
8824 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8825 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8826 }
8827
8828 // t = vsplti c, result = vsldoi t, t, 1
8829 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
8831 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
8832 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8833 }
8834 // t = vsplti c, result = vsldoi t, t, 2
8835 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
8837 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8838 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8839 }
8840 // t = vsplti c, result = vsldoi t, t, 3
8841 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
8843 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8844 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8845 }
8846 }
8847
8848 return SDValue();
8849}
8850
8851/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8852/// the specified operations to build the shuffle.
8854 SDValue RHS, SelectionDAG &DAG,
8855 const SDLoc &dl) {
8856 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8857 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8858 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8859
8860 enum {
8861 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8862 OP_VMRGHW,
8863 OP_VMRGLW,
8868 OP_VSLDOI4,
8869 OP_VSLDOI8,
8871 };
8872
8873 if (OpNum == OP_COPY) {
8874 if (LHSID == (1*9+2)*9+3) return LHS;
8875 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8876 return RHS;
8877 }
8878
8882
8883 int ShufIdxs[16];
8884 switch (OpNum) {
8885 default: llvm_unreachable("Unknown i32 permute!");
8886 case OP_VMRGHW:
8887 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
8888 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8889 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
8890 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8891 break;
8892 case OP_VMRGLW:
8893 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8894 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8895 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8896 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8897 break;
8898 case OP_VSPLTISW0:
8899 for (unsigned i = 0; i != 16; ++i)
8900 ShufIdxs[i] = (i&3)+0;
8901 break;
8902 case OP_VSPLTISW1:
8903 for (unsigned i = 0; i != 16; ++i)
8904 ShufIdxs[i] = (i&3)+4;
8905 break;
8906 case OP_VSPLTISW2:
8907 for (unsigned i = 0; i != 16; ++i)
8908 ShufIdxs[i] = (i&3)+8;
8909 break;
8910 case OP_VSPLTISW3:
8911 for (unsigned i = 0; i != 16; ++i)
8912 ShufIdxs[i] = (i&3)+12;
8913 break;
8914 case OP_VSLDOI4:
8915 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8916 case OP_VSLDOI8:
8917 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8918 case OP_VSLDOI12:
8919 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8920 }
8921 EVT VT = OpLHS.getValueType();
8925 return DAG.getNode(ISD::BITCAST, dl, VT, T);
8926}
8927
8928/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8929/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8930/// SDValue.
8931SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8932 SelectionDAG &DAG) const {
8933 const unsigned BytesInVector = 16;
8934 bool IsLE = Subtarget.isLittleEndian();
8935 SDLoc dl(N);
8936 SDValue V1 = N->getOperand(0);
8937 SDValue V2 = N->getOperand(1);
8938 unsigned ShiftElts = 0, InsertAtByte = 0;
8939 bool Swap = false;
8940
8941 // Shifts required to get the byte we want at element 7.
8942 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
8943 0, 15, 14, 13, 12, 11, 10, 9};
8944 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
8945 1, 2, 3, 4, 5, 6, 7, 8};
8946
8947 ArrayRef<int> Mask = N->getMask();
8948 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8949
8950 // For each mask element, find out if we're just inserting something
8951 // from V2 into V1 or vice versa.
8952 // Possible permutations inserting an element from V2 into V1:
8953 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8954 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8955 // ...
8956 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8957 // Inserting from V1 into V2 will be similar, except mask range will be
8958 // [16,31].
8959
8960 bool FoundCandidate = false;
8961 // If both vector operands for the shuffle are the same vector, the mask
8962 // will contain only elements from the first one and the second one will be
8963 // undef.
8964 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
8965 // Go through the mask of half-words to find an element that's being moved
8966 // from one vector to the other.
8967 for (unsigned i = 0; i < BytesInVector; ++i) {
8968 unsigned CurrentElement = Mask[i];
8969 // If 2nd operand is undefined, we should only look for element 7 in the
8970 // Mask.
8971 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
8972 continue;
8973
8974 bool OtherElementsInOrder = true;
8975 // Examine the other elements in the Mask to see if they're in original
8976 // order.
8977 for (unsigned j = 0; j < BytesInVector; ++j) {
8978 if (j == i)
8979 continue;
8980 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8981 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
8982 // in which we always assume we're always picking from the 1st operand.
8983 int MaskOffset =
8984 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
8985 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
8986 OtherElementsInOrder = false;
8987 break;
8988 }
8989 }
8990 // If other elements are in original order, we record the number of shifts
8991 // we need to get the element we want into element 7. Also record which byte
8992 // in the vector we should insert into.
8994 // If 2nd operand is undefined, we assume no shifts and no swapping.
8995 if (V2.isUndef()) {
8996 ShiftElts = 0;
8997 Swap = false;
8998 } else {
8999 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9003 }
9004 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9005 FoundCandidate = true;
9006 break;
9007 }
9008 }
9009
9010 if (!FoundCandidate)
9011 return SDValue();
9012
9013 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9014 // optionally with VECSHL if shift is required.
9015 if (Swap)
9016 std::swap(V1, V2);
9017 if (V2.isUndef())
9018 V2 = V1;
9019 if (ShiftElts) {
9020 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9021 DAG.getConstant(ShiftElts, dl, MVT::i32));
9022 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9024 }
9025 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9027}
9028
9029/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9030/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9031/// SDValue.
9032SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9033 SelectionDAG &DAG) const {
9034 const unsigned NumHalfWords = 8;
9035 const unsigned BytesInVector = NumHalfWords * 2;
9036 // Check that the shuffle is on half-words.
9037 if (!isNByteElemShuffleMask(N, 2, 1))
9038 return SDValue();
9039
9040 bool IsLE = Subtarget.isLittleEndian();
9041 SDLoc dl(N);
9042 SDValue V1 = N->getOperand(0);
9043 SDValue V2 = N->getOperand(1);
9044 unsigned ShiftElts = 0, InsertAtByte = 0;
9045 bool Swap = false;
9046
9047 // Shifts required to get the half-word we want at element 3.
9048 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9049 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9050
9051 uint32_t Mask = 0;
9052 uint32_t OriginalOrderLow = 0x1234567;
9053 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9054 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9055 // 32-bit space, only need 4-bit nibbles per element.
9056 for (unsigned i = 0; i < NumHalfWords; ++i) {
9057 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9058 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9059 }
9060
9061 // For each mask element, find out if we're just inserting something
9062 // from V2 into V1 or vice versa. Possible permutations inserting an element
9063 // from V2 into V1:
9064 // X, 1, 2, 3, 4, 5, 6, 7
9065 // 0, X, 2, 3, 4, 5, 6, 7
9066 // 0, 1, X, 3, 4, 5, 6, 7
9067 // 0, 1, 2, X, 4, 5, 6, 7
9068 // 0, 1, 2, 3, X, 5, 6, 7
9069 // 0, 1, 2, 3, 4, X, 6, 7
9070 // 0, 1, 2, 3, 4, 5, X, 7
9071 // 0, 1, 2, 3, 4, 5, 6, X
9072 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9073
9074 bool FoundCandidate = false;
9075 // Go through the mask of half-words to find an element that's being moved
9076 // from one vector to the other.
9077 for (unsigned i = 0; i < NumHalfWords; ++i) {
9078 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9079 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9080 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9081 uint32_t TargetOrder = 0x0;
9082
9083 // If both vector operands for the shuffle are the same vector, the mask
9084 // will contain only elements from the first one and the second one will be
9085 // undef.
9086 if (V2.isUndef()) {
9087 ShiftElts = 0;
9088 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9090 Swap = false;
9091 // Skip if not the correct element or mask of other elements don't equal
9092 // to our expected order.
9093 if (MaskOneElt == VINSERTHSrcElem &&
9094 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9095 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9096 FoundCandidate = true;
9097 break;
9098 }
9099 } else { // If both operands are defined.
9100 // Target order is [8,15] if the current mask is between [0,7].
9101 TargetOrder =
9103 // Skip if mask of other elements don't equal our expected order.
9104 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9105 // We only need the last 3 bits for the number of shifts.
9107 : BigEndianShifts[MaskOneElt & 0x7];
9108 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9109 Swap = MaskOneElt < NumHalfWords;
9110 FoundCandidate = true;
9111 break;
9112 }
9113 }
9114 }
9115
9116 if (!FoundCandidate)
9117 return SDValue();
9118
9119 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9120 // optionally with VECSHL if shift is required.
9121 if (Swap)
9122 std::swap(V1, V2);
9123 if (V2.isUndef())
9124 V2 = V1;
9126 if (ShiftElts) {
9127 // Double ShiftElts because we're left shifting on v16i8 type.
9128 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9129 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9130 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9133 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9134 }
9138 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9139}
9140
9141/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9142/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9143/// return the default SDValue.
9144SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9145 SelectionDAG &DAG) const {
9146 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9147 // to v16i8. Peek through the bitcasts to get the actual operands.
9148 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9149 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9150
9151 auto ShuffleMask = SVN->getMask();
9153 SDLoc DL(SVN);
9154
9155 // Check that we have a four byte shuffle.
9156 if (!isNByteElemShuffleMask(SVN, 4, 1))
9157 return SDValue();
9158
9159 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9160 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9161 std::swap(LHS, RHS);
9163 ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9164 }
9165
9166 // Ensure that the RHS is a vector of constants.
9168 if (!BVN)
9169 return SDValue();
9170
9171 // Check if RHS is a splat of 4-bytes (or smaller).
9173 unsigned SplatBitSize;
9174 bool HasAnyUndefs;
9175 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9176 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9177 SplatBitSize > 32)
9178 return SDValue();
9179
9180 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9181 // The instruction splats a constant C into two words of the source vector
9182 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9183 // Thus we check that the shuffle mask is the equivalent of
9184 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9185 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9186 // within each word are consecutive, so we only need to check the first byte.
9187 SDValue Index;
9188 bool IsLE = Subtarget.isLittleEndian();
9189 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9190 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9191 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9192 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9193 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9194 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9195 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9196 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9197 else
9198 return SDValue();
9199
9200 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9201 // for XXSPLTI32DX.
9202 unsigned SplatVal = APSplatValue.getZExtValue();
9203 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9204 SplatVal |= (SplatVal << SplatBitSize);
9205
9208 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9210}
9211
9212/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9213/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9214/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9215/// i.e (or (shl x, C1), (srl x, 128-C1)).
9216SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9217 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9218 assert(Op.getValueType() == MVT::v1i128 &&
9219 "Only set v1i128 as custom, other type shouldn't reach here!");
9220 SDLoc dl(Op);
9221 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9222 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9223 unsigned SHLAmt = N1.getConstantOperandVal(0);
9224 if (SHLAmt % 8 == 0) {
9226 std::iota(Mask.begin(), Mask.end(), 0);
9227 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9228 if (SDValue Shuffle =
9230 DAG.getUNDEF(MVT::v16i8), Mask))
9231 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9232 }
9235 DAG.getConstant(SHLAmt, dl, MVT::i32));
9237 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9239 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9240}
9241
9242/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9243/// is a shuffle we can handle in a single instruction, return it. Otherwise,
9244/// return the code it can be lowered into. Worst case, it can always be
9245/// lowered into a vperm.
9246SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9247 SelectionDAG &DAG) const {
9248 SDLoc dl(Op);
9249 SDValue V1 = Op.getOperand(0);
9250 SDValue V2 = Op.getOperand(1);
9252
9253 // Any nodes that were combined in the target-independent combiner prior
9254 // to vector legalization will not be sent to the target combine. Try to
9255 // combine it here.
9256 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9258 return NewShuffle;
9259 Op = NewShuffle;
9261 V1 = Op.getOperand(0);
9262 V2 = Op.getOperand(1);
9263 }
9264 EVT VT = Op.getValueType();
9265 bool isLittleEndian = Subtarget.isLittleEndian();
9266
9267 unsigned ShiftElts, InsertAtByte;
9268 bool Swap = false;
9269
9270 // If this is a load-and-splat, we can do that with a single instruction
9271 // in some cases. However if the load has multiple uses, we don't want to
9272 // combine it because that will just produce multiple loads.
9273 bool IsPermutedLoad = false;
9275 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9277 InputLoad->hasOneUse()) {
9279 int SplatIdx =
9281
9282 // The splat index for permuted loads will be in the left half of the vector
9283 // which is strictly wider than the loaded value by 8 bytes. So we need to
9284 // adjust the splat index to point to the correct address in memory.
9285 if (IsPermutedLoad) {
9286 assert(isLittleEndian && "Unexpected permuted load on big endian target");
9287 SplatIdx += IsFourByte ? 2 : 1;
9288 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9289 "Splat of a value outside of the loaded memory");
9290 }
9291
9293 // For 4-byte load-and-splat, we need Power9.
9294 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9295 uint64_t Offset = 0;
9296 if (IsFourByte)
9297 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9298 else
9299 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9300
9301 SDValue BasePtr = LD->getBasePtr();
9302 if (Offset != 0)
9304 BasePtr, DAG.getIntPtrConstant(Offset, dl));
9305 SDValue Ops[] = {
9306 LD->getChain(), // Chain
9307 BasePtr, // BasePtr
9308 DAG.getValueType(Op.getValueType()) // VT
9309 };
9310 SDVTList VTL =
9311 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9312 SDValue LdSplt =
9314 Ops, LD->getMemoryVT(), LD->getMemOperand());
9315 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9316 if (LdSplt.getValueType() != SVOp->getValueType(0))
9317 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9318 return LdSplt;
9319 }
9320 }
9321 if (Subtarget.hasP9Vector() &&
9323 isLittleEndian)) {
9324 if (Swap)
9325 std::swap(V1, V2);
9328 if (ShiftElts) {
9330 DAG.getConstant(ShiftElts, dl, MVT::i32));
9333 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9334 }
9337 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9338 }
9339
9340 if (Subtarget.hasPrefixInstrs()) {
9342 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9343 return SplatInsertNode;
9344 }
9345
9346 if (Subtarget.hasP9Altivec()) {
9348 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9349 return NewISDNode;
9350
9351 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9352 return NewISDNode;
9353 }
9354
9355 if (Subtarget.hasVSX() &&
9356 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9357 if (Swap)
9358 std::swap(V1, V2);
9360 SDValue Conv2 =
9361 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9362
9364 DAG.getConstant(ShiftElts, dl, MVT::i32));
9365 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9366 }
9367
9368 if (Subtarget.hasVSX() &&
9369 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9370 if (Swap)
9371 std::swap(V1, V2);
9373 SDValue Conv2 =
9374 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9375
9377 DAG.getConstant(ShiftElts, dl, MVT::i32));
9378 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9379 }
9380
9381 if (Subtarget.hasP9Vector()) {
9385 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9386 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9389 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9390 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9393 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9394 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9397 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9398 }
9399 }
9400
9401 if (Subtarget.hasVSX()) {
9402 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9404
9406 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9407 DAG.getConstant(SplatIdx, dl, MVT::i32));
9408 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9409 }
9410
9411 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9412 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9415 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9416 }
9417 }
9418
9419 // Cases that are handled by instructions that take permute immediates
9420 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9421 // selected by the instruction selector.
9422 if (V2.isUndef()) {
9423 if (PPC::isSplatShuffleMask(SVOp, 1) ||
9428 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9429 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9430 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9431 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9432 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9433 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9434 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9435 (Subtarget.hasP8Altivec() && (
9437 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9438 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9439 return Op;
9440 }
9441 }
9442
9443 // Altivec has a variety of "shuffle immediates" that take two vector inputs
9444 // and produce a fixed permutation. If any of these match, do not lower to
9445 // VPERM.
9446 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
9447 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9448 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9449 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
9450 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9451 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9452 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9453 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9454 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9455 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9456 (Subtarget.hasP8Altivec() && (
9457 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9458 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
9459 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
9460 return Op;
9461
9462 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
9463 // perfect shuffle table to emit an optimal matching sequence.
9464 ArrayRef<int> PermMask = SVOp->getMask();
9465
9466 unsigned PFIndexes[4];
9467 bool isFourElementShuffle = true;
9468 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
9469 unsigned EltNo = 8; // Start out undef.
9470 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
9471 if (PermMask[i*4+j] < 0)
9472 continue; // Undef, ignore it.
9473
9474 unsigned ByteSource = PermMask[i*4+j];
9475 if ((ByteSource & 3) != j) {
9476 isFourElementShuffle = false;
9477 break;
9478 }
9479
9480 if (EltNo == 8) {
9481 EltNo = ByteSource/4;
9482 } else if (EltNo != ByteSource/4) {
9483 isFourElementShuffle = false;
9484 break;
9485 }
9486 }
9487 PFIndexes[i] = EltNo;
9488 }
9489
9490 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
9491 // perfect shuffle vector to determine if it is cost effective to do this as
9492 // discrete instructions, or whether we should use a vperm.
9493 // For now, we skip this for little endian until such time as we have a
9494 // little-endian perfect shuffle table.
9495 if (isFourElementShuffle && !isLittleEndian) {
9496 // Compute the index in the perfect shuffle table.
9497 unsigned PFTableIndex =
9498 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9499
9501 unsigned Cost = (PFEntry >> 30);
9502
9503 // Determining when to avoid vperm is tricky. Many things affect the cost
9504 // of vperm, particularly how many times the perm mask needs to be computed.
9505 // For example, if the perm mask can be hoisted out of a loop or is already
9506 // used (perhaps because there are multiple permutes with the same shuffle
9507 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
9508 // the loop requires an extra register.
9509 //
9510 // As a compromise, we only emit discrete instructions if the shuffle can be
9511 // generated in 3 or fewer operations. When we have loop information
9512 // available, if this block is within a loop, we should avoid using vperm
9513 // for 3-operation perms and use a constant pool load instead.
9514 if (Cost < 3)
9515 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9516 }
9517
9518 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9519 // vector that will get spilled to the constant pool.
9520 if (V2.isUndef()) V2 = V1;
9521
9522 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9523 // that it is in input element units, not in bytes. Convert now.
9524
9525 // For little endian, the order of the input vectors is reversed, and
9526 // the permutation mask is complemented with respect to 31. This is
9527 // necessary to produce proper semantics with the big-endian-biased vperm
9528 // instruction.
9529 EVT EltVT = V1.getValueType().getVectorElementType();
9530 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9531
9533 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
9534 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
9535
9536 for (unsigned j = 0; j != BytesPerElement; ++j)
9537 if (isLittleEndian)
9538 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9539 dl, MVT::i32));
9540 else
9541 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9542 MVT::i32));
9543 }
9544
9547 LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
9548 LLVM_DEBUG(SVOp->dump());
9549 LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
9550 LLVM_DEBUG(VPermMask.dump());
9551
9552 if (isLittleEndian)
9553 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9554 V2, V1, VPermMask);
9555 else
9556 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9557 V1, V2, VPermMask);
9558}
9559
9560/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9561/// vector comparison. If it is, return true and fill in Opc/isDot with
9562/// information about the intrinsic.
9564 bool &isDot, const PPCSubtarget &Subtarget) {
9565 unsigned IntrinsicID =
9566 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9567 CompareOpc = -1;
9568 isDot = false;
9569 switch (IntrinsicID) {
9570 default:
9571 return false;
9572 // Comparison predicates.
9573 case Intrinsic::ppc_altivec_vcmpbfp_p:
9574 CompareOpc = 966;
9575 isDot = true;
9576 break;
9577 case Intrinsic::ppc_altivec_vcmpeqfp_p:
9578 CompareOpc = 198;
9579 isDot = true;
9580 break;
9581 case Intrinsic::ppc_altivec_vcmpequb_p:
9582 CompareOpc = 6;
9583 isDot = true;
9584 break;
9585 case Intrinsic::ppc_altivec_vcmpequh_p:
9586 CompareOpc = 70;
9587 isDot = true;
9588 break;
9589 case Intrinsic::ppc_altivec_vcmpequw_p:
9590 CompareOpc = 134;
9591 isDot = true;
9592 break;
9593 case Intrinsic::ppc_altivec_vcmpequd_p:
9594 if (Subtarget.hasP8Altivec()) {
9595 CompareOpc = 199;
9596 isDot = true;
9597 } else
9598 return false;
9599 break;
9600 case Intrinsic::ppc_altivec_vcmpneb_p:
9601 case Intrinsic::ppc_altivec_vcmpneh_p:
9602 case Intrinsic::ppc_altivec_vcmpnew_p:
9603 case Intrinsic::ppc_altivec_vcmpnezb_p:
9604 case Intrinsic::ppc_altivec_vcmpnezh_p:
9605 case Intrinsic::ppc_altivec_vcmpnezw_p:
9606 if (Subtarget.hasP9Altivec()) {
9607 switch (IntrinsicID) {
9608 default:
9609 llvm_unreachable("Unknown comparison intrinsic.");
9610 case Intrinsic::ppc_altivec_vcmpneb_p:
9611 CompareOpc = 7;
9612 break;
9613 case Intrinsic::ppc_altivec_vcmpneh_p:
9614 CompareOpc = 71;
9615 break;
9616 case Intrinsic::ppc_altivec_vcmpnew_p:
9617 CompareOpc = 135;
9618 break;
9619 case Intrinsic::ppc_altivec_vcmpnezb_p:
9620 CompareOpc = 263;
9621 break;
9622 case Intrinsic::ppc_altivec_vcmpnezh_p:
9623 CompareOpc = 327;
9624 break;
9625 case Intrinsic::ppc_altivec_vcmpnezw_p:
9626 CompareOpc = 391;
9627 break;
9628 }
9629 isDot = true;
9630 } else
9631 return false;
9632 break;
9633 case Intrinsic::ppc_altivec_vcmpgefp_p:
9634 CompareOpc = 454;
9635 isDot = true;
9636 break;
9637 case Intrinsic::ppc_altivec_vcmpgtfp_p:
9638 CompareOpc = 710;
9639 isDot = true;
9640 break;
9641 case Intrinsic::ppc_altivec_vcmpgtsb_p:
9642 CompareOpc = 774;
9643 isDot = true;
9644 break;
9645 case Intrinsic::ppc_altivec_vcmpgtsh_p:
9646 CompareOpc = 838;
9647 isDot = true;
9648 break;
9649 case Intrinsic::ppc_altivec_vcmpgtsw_p:
9650 CompareOpc = 902;
9651 isDot = true;
9652 break;
9653 case Intrinsic::ppc_altivec_vcmpgtsd_p:
9654 if (Subtarget.hasP8Altivec()) {
9655 CompareOpc = 967;
9656 isDot = true;
9657 } else
9658 return false;
9659 break;
9660 case Intrinsic::ppc_altivec_vcmpgtub_p:
9661 CompareOpc = 518;
9662 isDot = true;
9663 break;
9664 case Intrinsic::ppc_altivec_vcmpgtuh_p:
9665 CompareOpc = 582;
9666 isDot = true;
9667 break;
9668 case Intrinsic::ppc_altivec_vcmpgtuw_p:
9669 CompareOpc = 646;
9670 isDot = true;
9671 break;
9672 case Intrinsic::ppc_altivec_vcmpgtud_p:
9673 if (Subtarget.hasP8Altivec()) {
9674 CompareOpc = 711;
9675 isDot = true;
9676 } else
9677 return false;
9678 break;
9679
9680 case Intrinsic::ppc_altivec_vcmpequq:
9681 case Intrinsic::ppc_altivec_vcmpgtsq:
9682 case Intrinsic::ppc_altivec_vcmpgtuq:
9683 if (!Subtarget.isISA3_1())
9684 return false;
9685 switch (IntrinsicID) {
9686 default:
9687 llvm_unreachable("Unknown comparison intrinsic.");
9688 case Intrinsic::ppc_altivec_vcmpequq:
9689 CompareOpc = 455;
9690 break;
9691 case Intrinsic::ppc_altivec_vcmpgtsq:
9692 CompareOpc = 903;
9693 break;
9694 case Intrinsic::ppc_altivec_vcmpgtuq:
9695 CompareOpc = 647;
9696 break;
9697 }
9698 break;
9699
9700 // VSX predicate comparisons use the same infrastructure
9701 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9702 case Intrinsic::ppc_vsx_xvcmpgedp_p:
9703 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9704 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9705 case Intrinsic::ppc_vsx_xvcmpgesp_p:
9706 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9707 if (Subtarget.hasVSX()) {
9708 switch (IntrinsicID) {
9709 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9710 CompareOpc = 99;
9711 break;
9712 case Intrinsic::ppc_vsx_xvcmpgedp_p:
9713 CompareOpc = 115;
9714 break;
9715 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9716 CompareOpc = 107;
9717 break;
9718 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9719 CompareOpc = 67;
9720 break;
9721 case Intrinsic::ppc_vsx_xvcmpgesp_p:
9722 CompareOpc = 83;
9723 break;
9724 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9725 CompareOpc = 75;
9726 break;
9727 }
9728 isDot = true;
9729 } else
9730 return false;
9731 break;
9732
9733 // Normal Comparisons.
9734 case Intrinsic::ppc_altivec_vcmpbfp:
9735 CompareOpc = 966;
9736 break;
9737 case Intrinsic::ppc_altivec_vcmpeqfp:
9738 CompareOpc = 198;
9739 break;
9740 case Intrinsic::ppc_altivec_vcmpequb:
9741 CompareOpc = 6;
9742 break;
9743 case Intrinsic::ppc_altivec_vcmpequh:
9744 CompareOpc = 70;
9745 break;
9746 case Intrinsic::ppc_altivec_vcmpequw:
9747 CompareOpc = 134;
9748 break;
9749 case Intrinsic::ppc_altivec_vcmpequd:
9750 if (Subtarget.hasP8Altivec())
9751 CompareOpc = 199;
9752 else
9753 return false;
9754 break;
9755 case Intrinsic::ppc_altivec_vcmpneb:
9756 case Intrinsic::ppc_altivec_vcmpneh:
9757 case Intrinsic::ppc_altivec_vcmpnew:
9758 case Intrinsic::ppc_altivec_vcmpnezb:
9759 case Intrinsic::ppc_altivec_vcmpnezh:
9760 case Intrinsic::ppc_altivec_vcmpnezw:
9761 if (Subtarget.hasP9Altivec())
9762 switch (IntrinsicID) {
9763 default:
9764 llvm_unreachable("Unknown comparison intrinsic.");
9765 case Intrinsic::ppc_altivec_vcmpneb:
9766 CompareOpc = 7;
9767 break;
9768 case Intrinsic::ppc_altivec_vcmpneh:
9769 CompareOpc = 71;
9770 break;
9771 case Intrinsic::ppc_altivec_vcmpnew:
9772 CompareOpc = 135;
9773 break;
9774 case Intrinsic::ppc_altivec_vcmpnezb:
9775 CompareOpc = 263;
9776 break;
9777 case Intrinsic::ppc_altivec_vcmpnezh:
9778 CompareOpc = 327;
9779 break;
9780 case Intrinsic::ppc_altivec_vcmpnezw:
9781 CompareOpc = 391;
9782 break;
9783 }
9784 else
9785 return false;
9786 break;
9787 case Intrinsic::ppc_altivec_vcmpgefp:
9788 CompareOpc = 454;
9789 break;
9790 case Intrinsic::ppc_altivec_vcmpgtfp:
9791 CompareOpc = 710;
9792 break;
9793 case Intrinsic::ppc_altivec_vcmpgtsb:
9794 CompareOpc = 774;
9795 break;
9796 case Intrinsic::ppc_altivec_vcmpgtsh:
9797 CompareOpc = 838;
9798 break;
9799 case Intrinsic::ppc_altivec_vcmpgtsw:
9800 CompareOpc = 902;
9801 break;
9802 case Intrinsic::ppc_altivec_vcmpgtsd:
9803 if (Subtarget.hasP8Altivec())
9804 CompareOpc = 967;
9805 else
9806 return false;
9807 break;
9808 case Intrinsic::ppc_altivec_vcmpgtub:
9809 CompareOpc = 518;
9810 break;
9811 case Intrinsic::ppc_altivec_vcmpgtuh:
9812 CompareOpc = 582;
9813 break;
9814 case Intrinsic::ppc_altivec_vcmpgtuw:
9815 CompareOpc = 646;
9816 break;
9817 case Intrinsic::ppc_altivec_vcmpgtud:
9818 if (Subtarget.hasP8Altivec())
9819 CompareOpc = 711;
9820 else
9821 return false;
9822 break;
9823 case Intrinsic::ppc_altivec_vcmpequq_p:
9824 case Intrinsic::ppc_altivec_vcmpgtsq_p:
9825 case Intrinsic::ppc_altivec_vcmpgtuq_p:
9826 if (!Subtarget.isISA3_1())
9827 return false;
9828 switch (IntrinsicID) {
9829 default:
9830 llvm_unreachable("Unknown comparison intrinsic.");
9831 case Intrinsic::ppc_altivec_vcmpequq_p:
9832 CompareOpc = 455;
9833 break;
9834 case Intrinsic::ppc_altivec_vcmpgtsq_p:
9835 CompareOpc = 903;
9836 break;
9837 case Intrinsic::ppc_altivec_vcmpgtuq_p:
9838 CompareOpc = 647;
9839 break;
9840 }
9841 isDot = true;
9842 break;
9843 }
9844 return true;
9845}
9846
9847/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9848/// lower, do it, otherwise return null.
9849SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9850 SelectionDAG &DAG) const {
9851 unsigned IntrinsicID =
9852 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9853
9854 SDLoc dl(Op);
9855
9856 switch (IntrinsicID) {
9857 case Intrinsic::thread_pointer:
9858 // Reads the thread pointer register, used for __builtin_thread_pointer.
9859 if (Subtarget.isPPC64())
9860 return DAG.getRegister(PPC::X13, MVT::i64);
9861 return DAG.getRegister(PPC::R2, MVT::i32);
9862
9863 case Intrinsic::ppc_mma_disassemble_acc:
9864 case Intrinsic::ppc_vsx_disassemble_pair: {
9865 int NumVecs = 2;
9866 SDValue WideVec = Op.getOperand(1);
9867 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
9868 NumVecs = 4;
9870 }
9872 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
9873 SDValue Extract = DAG.getNode(
9875 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
9876 : VecNo,
9877 dl, MVT::i64));
9878 RetOps.push_back(Extract);
9879 }
9880 return DAG.getMergeValues(RetOps, dl);
9881 }
9882 }
9883
9884 // If this is a lowered altivec predicate compare, CompareOpc is set to the
9885 // opcode number of the comparison.
9886 int CompareOpc;
9887 bool isDot;
9888 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9889 return SDValue(); // Don't custom lower most intrinsics.
9890
9891 // If this is a non-dot comparison, make the VCMP node and we are done.
9892 if (!isDot) {
9893 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9894 Op.getOperand(1), Op.getOperand(2),
9895 DAG.getConstant(CompareOpc, dl, MVT::i32));
9896 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9897 }
9898
9899 // Create the PPCISD altivec 'dot' comparison node.
9900 SDValue Ops[] = {
9901 Op.getOperand(2), // LHS
9902 Op.getOperand(3), // RHS
9904 };
9905 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9906 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
9907
9908 // Now that we have the comparison, emit a copy from the CR to a GPR.
9909 // This is flagged to the above dot comparison.
9910 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9911 DAG.getRegister(PPC::CR6, MVT::i32),
9912 CompNode.getValue(1));
9913
9914 // Unpack the result based on how the target uses it.
9915 unsigned BitNo; // Bit # of CR6.
9916 bool InvertBit; // Invert result?
9917 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9918 default: // Can't happen, don't crash on invalid number though.
9919 case 0: // Return the value of the EQ bit of CR6.
9920 BitNo = 0; InvertBit = false;
9921 break;
9922 case 1: // Return the inverted value of the EQ bit of CR6.
9923 BitNo = 0; InvertBit = true;
9924 break;
9925 case 2: // Return the value of the LT bit of CR6.
9926 BitNo = 2; InvertBit = false;
9927 break;
9928 case 3: // Return the inverted value of the LT bit of CR6.
9929 BitNo = 2; InvertBit = true;
9930 break;
9931 }
9932
9933 // Shift the bit into the low position.
9934 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9935 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9936 // Isolate the bit.
9937 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9938 DAG.getConstant(1, dl, MVT::i32));
9939
9940 // If we are supposed to, toggle the bit.
9941 if (InvertBit)
9942 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9943 DAG.getConstant(1, dl, MVT::i32));
9944 return Flags;
9945}
9946
9947SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9948 SelectionDAG &DAG) const {
9949 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9950 // the beginning of the argument list.
9951 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
9952 SDLoc DL(Op);
9953 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
9954 case Intrinsic::ppc_cfence: {
9955 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
9956 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
9957 return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
9959 Op.getOperand(ArgStart + 1)),
9960 Op.getOperand(0)),
9961 0);
9962 }
9963 default:
9964 break;
9965 }
9966 return SDValue();
9967}
9968
9969// Lower scalar BSWAP64 to xxbrd.
9970SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
9971 SDLoc dl(Op);
9972 // MTVSRDD
9973 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
9974 Op.getOperand(0));
9975 // XXBRD
9976 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
9977 // MFVSRD
9978 int VectorIndex = 0;
9979 if (Subtarget.isLittleEndian())
9980 VectorIndex = 1;
9982 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
9983 return Op;
9984}
9985
9986// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9987// compared to a value that is atomically loaded (atomic loads zero-extend).
9988SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
9989 SelectionDAG &DAG) const {
9990 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
9991 "Expecting an atomic compare-and-swap here.");
9992 SDLoc dl(Op);
9993 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
9994 EVT MemVT = AtomicNode->getMemoryVT();
9995 if (MemVT.getSizeInBits() >= 32)
9996 return Op;
9997
9998 SDValue CmpOp = Op.getOperand(2);
9999 // If this is already correctly zero-extended, leave it alone.
10000 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10002 return Op;
10003
10004 // Clear the high bits of the compare operand.
10005 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10007 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10008 DAG.getConstant(MaskVal, dl, MVT::i32));
10009
10010 // Replace the existing compare operand with the properly zero-extended one.
10012 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10013 Ops.push_back(AtomicNode->getOperand(i));
10014 Ops[2] = NewCmpOp;
10015 MachineMemOperand *MMO = AtomicNode->getMemOperand();
10017 auto NodeTy =
10019 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10020}
10021
10022SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10023 SelectionDAG &DAG) const {
10024 SDLoc dl(Op);
10025 // Create a stack slot that is 16-byte aligned.
10027 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10029 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10030
10031 // Store the input value into Value#0 of the stack slot.
10032 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10034 // Load it out.
10035 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10036}
10037
10038SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10039 SelectionDAG &DAG) const {
10040 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10041 "Should only be called for ISD::INSERT_VECTOR_ELT");
10042
10043 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10044 // We have legal lowering for constant indices but not for variable ones.
10045 if (!C)
10046 return SDValue();
10047
10048 EVT VT = Op.getValueType();
10049 SDLoc dl(Op);
10050 SDValue V1 = Op.getOperand(0);
10051 SDValue V2 = Op.getOperand(1);
10052 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10053 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10054 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10056 unsigned InsertAtElement = C->getZExtValue();
10058 if (Subtarget.isLittleEndian()) {
10060 }
10061 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10063 }
10064 return Op;
10065}
10066
10067SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10068 SelectionDAG &DAG) const {
10069 SDLoc dl(Op);
10070 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10071 SDValue LoadChain = LN->getChain();
10072 SDValue BasePtr = LN->getBasePtr();
10073 EVT VT = Op.getValueType();
10074
10075 if (VT != MVT::v256i1 && VT != MVT::v512i1)
10076 return Op;
10077
10078 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10079 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10080 // 2 or 4 vsx registers.
10081 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10082 "Type unsupported without MMA");
10083 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10084 "Type unsupported without paired vector support");
10085 Align Alignment = LN->getAlign();
10088 unsigned NumVecs = VT.getSizeInBits() / 128;
10089 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10090 SDValue Load =
10091 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10092 LN->getPointerInfo().getWithOffset(Idx * 16),
10093 commonAlignment(Alignment, Idx * 16),
10094 LN->getMemOperand()->getFlags(), LN->getAAInfo());
10095 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10096 DAG.getConstant(16, dl, BasePtr.getValueType()));
10097 Loads.push_back(Load);
10098 LoadChains.push_back(Load.getValue(1));
10099 }
10100 if (Subtarget.isLittleEndian()) {
10101 std::reverse(Loads.begin(), Loads.end());
10102 std::reverse(LoadChains.begin(), LoadChains.end());
10103 }
10105 SDValue Value =
10106 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10107 dl, VT, Loads);
10108 SDValue RetOps[] = {Value, TF};
10109 return DAG.getMergeValues(RetOps, dl);
10110}
10111
10112SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10113 SelectionDAG &DAG) const {
10114 SDLoc dl(Op);
10115 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10116 SDValue StoreChain = SN->getChain();
10117 SDValue BasePtr = SN->getBasePtr();
10118 SDValue Value = SN->getValue();
10119 EVT StoreVT = Value.getValueType();
10120
10122 return Op;
10123
10124 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10125 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10126 // underlying registers individually.
10127 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10128 "Type unsupported without MMA");
10129 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10130 "Type unsupported without paired vector support");
10131 Align Alignment = SN->getAlign();
10133 unsigned NumVecs = 2;
10134 if (StoreVT == MVT::v512i1) {
10136 NumVecs = 4;
10137 }
10138 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10139 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10141 DAG.getConstant(VecNum, dl, MVT::i64));
10142 SDValue Store =
10143 DAG.getStore(StoreChain, dl, Elt, BasePtr,
10144 SN->getPointerInfo().getWithOffset(Idx * 16),
10145 commonAlignment(Alignment, Idx * 16),
10146 SN->getMemOperand()->getFlags(), SN->getAAInfo());
10147 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10148 DAG.getConstant(16, dl, BasePtr.getValueType()));
10149 Stores.push_back(Store);
10150 }
10151 SDValue TF = DAG.getTokenFactor(dl, Stores);
10152 return TF;
10153}
10154
10155SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10156 SDLoc dl(Op);
10157 if (Op.getValueType() == MVT::v4i32) {
10158 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10159
10160 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10161 // +16 as shift amt.
10162 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10163 SDValue RHSSwap = // = vrlw RHS, 16
10164 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10165
10166 // Shrinkify inputs to v8i16.
10167 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10168 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10170
10171 // Low parts multiplied together, generating 32-bit results (we ignore the
10172 // top parts).
10173 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10174 LHS, RHS, DAG, dl, MVT::v4i32);
10175
10176 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10177 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10178 // Shift the high parts up 16 bits.
10179 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10180 Neg16, DAG, dl);
10181 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10182 } else if (Op.getValueType() == MVT::v16i8) {
10183 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10184 bool isLittleEndian = Subtarget.isLittleEndian();
10185
10186 // Multiply the even 8-bit parts, producing 16-bit sums.
10187 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10188 LHS, RHS, DAG, dl, MVT::v8i16);
10190
10191 // Multiply the odd 8-bit parts, producing 16-bit sums.
10192 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10193 LHS, RHS, DAG, dl, MVT::v8i16);
10195
10196 // Merge the results together. Because vmuleub and vmuloub are
10197 // instructions with a big-endian bias, we must reverse the
10198 // element numbering and reverse the meaning of "odd" and "even"
10199 // when generating little endian code.
10200 int Ops[16];
10201 for (unsigned i = 0; i != 8; ++i) {
10202 if (isLittleEndian) {
10203 Ops[i*2 ] = 2*i;
10204 Ops[i*2+1] = 2*i+16;
10205 } else {
10206 Ops[i*2 ] = 2*i+1;
10207 Ops[i*2+1] = 2*i+1+16;
10208 }
10209 }
10210 if (isLittleEndian)
10211 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10212 else
10213 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10214 } else {
10215 llvm_unreachable("Unknown mul to lower!");
10216 }
10217}
10218
10219SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
10220 bool IsStrict = Op->isStrictFPOpcode();
10221 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
10222 !Subtarget.hasP9Vector())
10223 return SDValue();
10224
10225 return Op;
10226}
10227
10228// Custom lowering for fpext vf32 to v2f64
10229SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10230
10231 assert(Op.getOpcode() == ISD::FP_EXTEND &&
10232 "Should only be called for ISD::FP_EXTEND");
10233
10234 // FIXME: handle extends from half precision float vectors on P9.
10235 // We only want to custom lower an extend from v2f32 to v2f64.
10236 if (Op.getValueType() != MVT::v2f64 ||
10237 Op.getOperand(0).getValueType() != MVT::v2f32)
10238 return SDValue();
10239
10240 SDLoc dl(Op);
10241 SDValue Op0 = Op.getOperand(0);
10242
10243 switch (Op0.getOpcode()) {
10244 default:
10245 return SDValue();
10247 assert(Op0.getNumOperands() == 2 &&
10249 "Node should have 2 operands with second one being a constant!");
10250
10251 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10252 return SDValue();
10253
10254 // Custom lower is only done for high or low doubleword.
10255 int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10256 if (Idx % 2 != 0)
10257 return SDValue();
10258
10259 // Since input is v4f32, at this point Idx is either 0 or 2.
10260 // Shift to get the doubleword position we want.
10261 int DWord = Idx >> 1;
10262
10263 // High and low word positions are different on little endian.
10264 if (Subtarget.isLittleEndian())
10265 DWord ^= 0x1;
10266
10268 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10269 }
10270 case ISD::FADD:
10271 case ISD::FMUL:
10272 case ISD::FSUB: {
10273 SDValue NewLoad[2];
10274 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10275 // Ensure both input are loads.
10276 SDValue LdOp = Op0.getOperand(i);
10277 if (LdOp.getOpcode() != ISD::LOAD)
10278 return SDValue();
10279 // Generate new load node.
10281 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10284 LD->getMemoryVT(), LD->getMemOperand());
10285 }
10286 SDValue NewOp =
10287 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10288 NewLoad[1], Op0.getNode()->getFlags());
10290 DAG.getConstant(0, dl, MVT::i32));
10291 }
10292 case ISD::LOAD: {
10294 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10295 SDValue NewLd = DAG.getMemIntrinsicNode(
10297 LD->getMemoryVT(), LD->getMemOperand());
10298 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10299 DAG.getConstant(0, dl, MVT::i32));
10300 }
10301 }
10302 llvm_unreachable("ERROR:Should return for all cases within swtich.");
10303}
10304
10305/// LowerOperation - Provide custom lowering hooks for some operations.
10306///
10308 switch (Op.getOpcode()) {
10309 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10310 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10311 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10312 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10313 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10314 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
10315 case ISD::SETCC: return LowerSETCC(Op, DAG);
10316 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
10317 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
10318
10319 // Variable argument lowering.
10320 case ISD::VASTART: return LowerVASTART(Op, DAG);
10321 case ISD::VAARG: return LowerVAARG(Op, DAG);
10322 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
10323
10324 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
10325 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10327 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10328
10329 // Exception handling lowering.
10330 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
10331 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
10332 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
10333
10334 case ISD::LOAD: return LowerLOAD(Op, DAG);
10335 case ISD::STORE: return LowerSTORE(Op, DAG);
10336 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
10337 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10340 case ISD::FP_TO_UINT:
10341 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10344 case ISD::UINT_TO_FP:
10345 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10346 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
10347
10348 // Lower 64-bit shifts.
10349 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
10350 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
10351 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
10352
10353 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
10354 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
10355
10356 // Vector-related lowering.
10357 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
10358 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
10359 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10360 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
10361 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10362 case ISD::MUL: return LowerMUL(Op, DAG);
10363 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10365 case ISD::FP_ROUND:
10366 return LowerFP_ROUND(Op, DAG);
10367 case ISD::ROTL: return LowerROTL(Op, DAG);
10368
10369 // For counter-based loop handling.
10370 case ISD::INTRINSIC_W_CHAIN: return SDValue();
10371
10372 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
10373
10374 // Frame & Return address.
10375 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10376 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10377
10379 return LowerINTRINSIC_VOID(Op, DAG);
10380 case ISD::BSWAP:
10381 return LowerBSWAP(Op, DAG);
10383 return LowerATOMIC_CMP_SWAP(Op, DAG);
10384 }
10385}
10386
10389 SelectionDAG &DAG) const {
10390 SDLoc dl(N);
10391 switch (N->getOpcode()) {
10392 default:
10393 llvm_unreachable("Do not know how to custom type legalize this operation!");
10394 case ISD::READCYCLECOUNTER: {
10396 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10397
10398 Results.push_back(
10399 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
10400 Results.push_back(RTB.getValue(2));
10401 break;
10402 }
10404 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10405 Intrinsic::loop_decrement)
10406 break;
10407
10408 assert(N->getValueType(0) == MVT::i1 &&
10409 "Unexpected result type for CTR decrement intrinsic");
10411 N->getValueType(0));
10412 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10413 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10414 N->getOperand(1));
10415
10416 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10417 Results.push_back(NewInt.getValue(1));
10418 break;
10419 }
10420 case ISD::VAARG: {
10421 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10422 return;
10423
10424 EVT VT = N->getValueType(0);
10425
10426 if (VT == MVT::i64) {
10427 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10428
10429 Results.push_back(NewNode);
10430 Results.push_back(NewNode.getValue(1));
10431 }
10432 return;
10433 }
10436 case ISD::FP_TO_SINT:
10437 case ISD::FP_TO_UINT:
10438 // LowerFP_TO_INT() can only handle f32 and f64.
10439 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
10441 return;
10442 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10443 return;
10444 case ISD::TRUNCATE: {
10445 if (!N->getValueType(0).isVector())
10446 return;
10447 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
10448 if (Lowered)
10449 Results.push_back(Lowered);
10450 return;
10451 }
10452 case ISD::FSHL:
10453 case ISD::FSHR:
10454 // Don't handle funnel shifts here.
10455 return;
10456 case ISD::BITCAST:
10457 // Don't handle bitcast here.
10458 return;
10459 case ISD::FP_EXTEND:
10460 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
10461 if (Lowered)
10462 Results.push_back(Lowered);
10463 return;
10464 }
10465}
10466
10467//===----------------------------------------------------------------------===//
10468// Other Lowering Code
10469//===----------------------------------------------------------------------===//
10470
10472 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10473 Function *Func = Intrinsic::getDeclaration(M, Id);
10474 return Builder.CreateCall(Func, {});
10475}
10476
10477// The mappings for emitLeading/TrailingFence is taken from
10478// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10480 Instruction *Inst,
10481 AtomicOrdering Ord) const {
10483 return callIntrinsic(Builder, Intrinsic::ppc_sync);
10484 if (isReleaseOrStronger(Ord))
10485 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10486 return nullptr;
10487}
10488
10490 Instruction *Inst,
10491 AtomicOrdering Ord) const {
10492 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10493 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10494 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10495 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10496 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10497 return Builder.CreateCall(
10499 Builder.GetInsertBlock()->getParent()->getParent(),
10500 Intrinsic::ppc_cfence, {Inst->getType()}),
10501 {Inst});
10502 // FIXME: Can use isync for rmw operation.
10503 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10504 }
10505 return nullptr;
10506}
10507
10510 unsigned AtomicSize,
10511 unsigned BinOpcode,
10512 unsigned CmpOpcode,
10513 unsigned CmpPred) const {
10514 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10515 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10516
10517 auto LoadMnemonic = PPC::LDARX;
10518 auto StoreMnemonic = PPC::STDCX;
10519 switch (AtomicSize) {
10520 default:
10521 llvm_unreachable("Unexpected size of atomic entity");
10522 case 1:
10523 LoadMnemonic = PPC::LBARX;
10524 StoreMnemonic = PPC::STBCX;
10525 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10526 break;
10527 case 2:
10528 LoadMnemonic = PPC::LHARX;
10529 StoreMnemonic = PPC::STHCX;
10530 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10531 break;
10532 case 4:
10533 LoadMnemonic = PPC::LWARX;
10534 StoreMnemonic = PPC::STWCX;
10535 break;
10536 case 8:
10537 LoadMnemonic = PPC::LDARX;
10538 StoreMnemonic = PPC::STDCX;
10539 break;
10540 }
10541
10542 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10543 MachineFunction *F = BB->getParent();
10545
10546 Register dest = MI.getOperand(0).getReg();
10547 Register ptrA = MI.getOperand(1).getReg();
10548 Register ptrB = MI.getOperand(2).getReg();
10549 Register incr = MI.getOperand(3).getReg();
10550 DebugLoc dl = MI.getDebugLoc();
10551
10552 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10554 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10555 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10556 F->insert(It, loopMBB);
10557 if (CmpOpcode)
10558 F->insert(It, loop2MBB);
10559 F->insert(It, exitMBB);
10560 exitMBB->splice(exitMBB->begin(), BB,
10561 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10562 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10563
10564 MachineRegisterInfo &RegInfo = F->getRegInfo();
10565 Register TmpReg = (!BinOpcode) ? incr :
10566 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
10567 : &PPC::GPRCRegClass);
10568
10569 // thisMBB:
10570 // ...
10571 // fallthrough --> loopMBB
10572 BB->addSuccessor(loopMBB);
10573
10574 // loopMBB:
10575 // l[wd]arx dest, ptr
10576 // add r0, dest, incr
10577 // st[wd]cx. r0, ptr
10578 // bne- loopMBB
10579 // fallthrough --> exitMBB
10580
10581 // For max/min...
10582 // loopMBB:
10583 // l[wd]arx dest, ptr
10584 // cmpl?[wd] incr, dest
10585 // bgt exitMBB
10586 // loop2MBB:
10587 // st[wd]cx. dest, ptr
10588 // bne- loopMBB
10589 // fallthrough --> exitMBB
10590
10591 BB = loopMBB;
10592 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10594 if (BinOpcode)
10595 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10596 if (CmpOpcode) {
10597 // Signed comparisons of byte or halfword values must be sign-extended.
10598 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
10599 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10600 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
10601 ExtReg).addReg(dest);
10602 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10604 } else
10605 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10607
10608 BuildMI(BB, dl, TII->get(PPC::BCC))
10609 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10611 BB->addSuccessor(exitMBB);
10612 BB = loop2MBB;
10613 }
10614 BuildMI(BB, dl, TII->get(StoreMnemonic))
10615 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10616 BuildMI(BB, dl, TII->get(PPC::BCC))
10618 BB->addSuccessor(loopMBB);
10619 BB->addSuccessor(exitMBB);
10620
10621 // exitMBB:
10622 // ...
10623 BB = exitMBB;
10624 return BB;
10625}
10626
10628 switch(MI.getOpcode()) {
10629 default:
10630 return false;
10631 case PPC::COPY:
10632 return TII->isSignExtended(MI);
10633 case PPC::LHA:
10634 case PPC::LHA8:
10635 case PPC::LHAU:
10636 case PPC::LHAU8:
10637 case PPC::LHAUX:
10638 case PPC::LHAUX8:
10639 case PPC::LHAX:
10640 case PPC::LHAX8:
10641 case PPC::LWA:
10642 case PPC::LWAUX:
10643 case PPC::LWAX:
10644 case PPC::LWAX_32:
10645 case PPC::LWA_32:
10646 case PPC::PLHA:
10647 case PPC::PLHA8:
10648 case PPC::PLHA8pc:
10649 case PPC::PLHApc:
10650 case PPC::PLWA:
10651 case PPC::PLWA8:
10652 case PPC::PLWA8pc:
10653 case PPC::PLWApc:
10654 case PPC::EXTSB:
10655 case PPC::EXTSB8:
10656 case PPC::EXTSB8_32_64:
10657 case PPC::EXTSB8_rec:
10658 case PPC::EXTSB_rec:
10659 case PPC::EXTSH:
10660 case PPC::EXTSH8:
10661 case PPC::EXTSH8_32_64:
10662 case PPC::EXTSH8_rec:
10663 case PPC::EXTSH_rec:
10664 case PPC::EXTSW:
10665 case PPC::EXTSWSLI:
10666 case PPC::EXTSWSLI_32_64:
10667 case PPC::EXTSWSLI_32_64_rec:
10668 case PPC::EXTSWSLI_rec:
10669 case PPC::EXTSW_32:
10670 case PPC::EXTSW_32_64:
10671 case PPC::EXTSW_32_64_rec:
10672 case PPC::EXTSW_rec:
10673 case PPC::SRAW:
10674 case PPC::SRAWI:
10675 case PPC::SRAWI_rec:
10676 case PPC::SRAW_rec:
10677 return true;
10678 }
10679 return false;
10680}
10681
10684 bool is8bit, // operation
10685 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10686 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10687 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
10688
10689 // If this is a signed comparison and the value being compared is not known
10690 // to be sign extended, sign extend it here.
10691 DebugLoc dl = MI.getDebugLoc();
10692 MachineFunction *F = BB->getParent();
10693 MachineRegisterInfo &RegInfo = F->getRegInfo();
10694 Register incr = MI.getOperand(3).getReg();
10696 isSignExtended(*RegInfo.getVRegDef(incr), TII);
10697
10698 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
10699 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10700 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
10701 .addReg(MI.getOperand(3).getReg());
10702 MI.getOperand(3).setReg(ValueReg);
10703 }
10704 // If we support part-word atomic mnemonics, just use them
10705 if (Subtarget.hasPartwordAtomics())
10706 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
10707 CmpPred);
10708
10709 // In 64 bit mode we have to use 64 bits for addresses, even though the
10710 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
10711 // registers without caring whether they're 32 or 64, but here we're
10712 // doing actual arithmetic on the addresses.
10713 bool is64bit = Subtarget.isPPC64();
10714 bool isLittleEndian = Subtarget.isLittleEndian();
10715 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10716
10717 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10719
10720 Register dest = MI.getOperand(0).getReg();
10721 Register ptrA = MI.getOperand(1).getReg();
10722 Register ptrB = MI.getOperand(2).getReg();
10723
10724 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10726 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10727 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10728 F->insert(It, loopMBB);
10729 if (CmpOpcode)
10730 F->insert(It, loop2MBB);
10731 F->insert(It, exitMBB);
10732 exitMBB->splice(exitMBB->begin(), BB,
10733 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10734 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10735
10736 const TargetRegisterClass *RC =
10737 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10738 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10739
10740 Register PtrReg = RegInfo.createVirtualRegister(RC);
10741 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10742 Register ShiftReg =
10743 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10744 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10745 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10746 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10747 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10748 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10749 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10750 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10751 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10753 Register TmpReg =
10754 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
10755
10756 // thisMBB:
10757 // ...
10758 // fallthrough --> loopMBB
10759 BB->addSuccessor(loopMBB);
10760
10761 // The 4-byte load must be aligned, while a char or short may be
10762 // anywhere in the word. Hence all this nasty bookkeeping code.
10763 // add ptr1, ptrA, ptrB [copy if ptrA==0]
10764 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10765 // xori shift, shift1, 24 [16]
10766 // rlwinm ptr, ptr1, 0, 0, 29
10767 // slw incr2, incr, shift
10768 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10769 // slw mask, mask2, shift
10770 // loopMBB:
10771 // lwarx tmpDest, ptr
10772 // add tmp, tmpDest, incr2
10773 // andc tmp2, tmpDest, mask
10774 // and tmp3, tmp, mask
10775 // or tmp4, tmp3, tmp2
10776 // stwcx. tmp4, ptr
10777 // bne- loopMBB
10778 // fallthrough --> exitMBB
10779 // srw dest, tmpDest, shift
10780 if (ptrA != ZeroReg) {
10781 Ptr1Reg = RegInfo.createVirtualRegister(RC);
10782 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10783 .addReg(ptrA)
10784 .addReg(ptrB);
10785 } else {
10786 Ptr1Reg = ptrB;
10787 }
10788 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10789 // mode.
10790 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10791 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10792 .addImm(3)
10793 .addImm(27)
10794 .addImm(is8bit ? 28 : 27);
10795 if (!isLittleEndian)
10796 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10798 .addImm(is8bit ? 24 : 16);
10799 if (is64bit)
10800 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10801 .addReg(Ptr1Reg)
10802 .addImm(0)
10803 .addImm(61);
10804 else
10805 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10806 .addReg(Ptr1Reg)
10807 .addImm(0)
10808 .addImm(0)
10809 .addImm(29);
10810 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10811 if (is8bit)
10812 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10813 else {
10814 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10815 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10817 .addImm(65535);
10818 }
10819 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10821 .addReg(ShiftReg);
10822
10823 BB = loopMBB;
10824 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10825 .addReg(ZeroReg)
10826 .addReg(PtrReg);
10827 if (BinOpcode)
10828 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10831 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10833 .addReg(MaskReg);
10834 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10835 if (CmpOpcode) {
10836 // For unsigned comparisons, we can directly compare the shifted values.
10837 // For signed comparisons we shift and sign extend.
10838 Register SReg = RegInfo.createVirtualRegister(GPRC);
10839 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10841 .addReg(MaskReg);
10842 unsigned ValueReg = SReg;
10843 unsigned CmpReg = Incr2Reg;
10844 if (CmpOpcode == PPC::CMPW) {
10845 ValueReg = RegInfo.createVirtualRegister(GPRC);
10846 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10847 .addReg(SReg)
10848 .addReg(ShiftReg);
10849 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
10850 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10851 .addReg(ValueReg);
10853 CmpReg = incr;
10854 }
10855 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10856 .addReg(CmpReg)
10857 .addReg(ValueReg);
10858 BuildMI(BB, dl, TII->get(PPC::BCC))
10859 .addImm(CmpPred)
10860 .addReg(PPC::CR0)
10861 .addMBB(exitMBB);
10863 BB->addSuccessor(exitMBB);
10864 BB = loop2MBB;
10865 }
10866 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10867 BuildMI(BB, dl, TII->get(PPC::STWCX))
10868 .addReg(Tmp4Reg)
10869 .addReg(ZeroReg)
10870 .addReg(PtrReg);
10871 BuildMI(BB, dl, TII->get(PPC::BCC))
10873 .addReg(PPC::CR0)
10874 .addMBB(loopMBB);
10875 BB->addSuccessor(loopMBB);
10876 BB->addSuccessor(exitMBB);
10877
10878 // exitMBB:
10879 // ...
10880 BB = exitMBB;
10881 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10883 .addReg(ShiftReg);
10884 return BB;
10885}
10886
10889 MachineBasicBlock *MBB) const {
10890 DebugLoc DL = MI.getDebugLoc();
10891 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10892 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10893
10894 MachineFunction *MF = MBB->getParent();
10896
10897 const BasicBlock *BB = MBB->getBasicBlock();
10899
10900 Register DstReg = MI.getOperand(0).getReg();
10901 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10902 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
10903 Register mainDstReg = MRI.createVirtualRegister(RC);
10904 Register restoreDstReg = MRI.createVirtualRegister(RC);
10905
10907 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10908 "Invalid Pointer Size!");
10909 // For v = setjmp(buf), we generate
10910 //
10911 // thisMBB:
10912 // SjLjSetup mainMBB
10913 // bl mainMBB
10914 // v_restore = 1
10915 // b sinkMBB
10916 //
10917 // mainMBB:
10918 // buf[LabelOffset] = LR
10919 // v_main = 0
10920 //
10921 // sinkMBB:
10922 // v = phi(main, restore)
10923 //
10924
10928 MF->insert(I, mainMBB);
10929 MF->insert(I, sinkMBB);
10930
10932
10933 // Transfer the remainder of BB and its successor edges to sinkMBB.
10934 sinkMBB->splice(sinkMBB->begin(), MBB,
10935 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10936 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
10937
10938 // Note that the structure of the jmp_buf used here is not compatible
10939 // with that used by libc, and is not designed to be. Specifically, it
10940 // stores only those 'reserved' registers that LLVM does not otherwise
10941 // understand how to spill. Also, by convention, by the time this
10942 // intrinsic is called, Clang has already stored the frame address in the
10943 // first slot of the buffer and stack address in the third. Following the
10944 // X86 target code, we'll store the jump address in the second slot. We also
10945 // need to save the TOC pointer (R2) to handle jumps between shared
10946 // libraries, and that will be stored in the fourth slot. The thread
10947 // identifier (R13) is not affected.
10948
10949 // thisMBB:
10950 const int64_t LabelOffset = 1 * PVT.getStoreSize();
10951 const int64_t TOCOffset = 3 * PVT.getStoreSize();
10952 const int64_t BPOffset = 4 * PVT.getStoreSize();
10953
10954 // Prepare IP either in reg.
10956 Register LabelReg = MRI.createVirtualRegister(PtrRC);
10957 Register BufReg = MI.getOperand(1).getReg();
10958
10959 if (Subtarget.is64BitELFABI()) {
10961 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
10962 .addReg(PPC::X2)
10964 .addReg(BufReg)
10965 .cloneMemRefs(MI);
10966 }
10967
10968 // Naked functions never have a base pointer, and so we use r1. For all
10969 // other functions, this decision must be delayed until during PEI.
10970 unsigned BaseReg;
10971 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
10972 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
10973 else
10974 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
10975
10976 MIB = BuildMI(*thisMBB, MI, DL,
10977 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
10978 .addReg(BaseReg)
10980 .addReg(BufReg)
10981 .cloneMemRefs(MI);
10982
10983 // Setup
10984 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
10985 MIB.addRegMask(TRI->getNoPreservedMask());
10986
10987 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
10988
10989 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
10990 .addMBB(mainMBB);
10991 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
10992
10993 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
10994 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
10995
10996 // mainMBB:
10997 // mainDstReg = 0
10998 MIB =
11000 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11001
11002 // Store IP
11003 if (Subtarget.isPPC64()) {
11004 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11007 .addReg(BufReg);
11008 } else {
11009 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11012 .addReg(BufReg);
11013 }
11014 MIB.cloneMemRefs(MI);
11015
11016 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11017 mainMBB->addSuccessor(sinkMBB);
11018
11019 // sinkMBB:
11020 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11021 TII->get(PPC::PHI), DstReg)
11024
11025 MI.eraseFromParent();
11026 return sinkMBB;
11027}
11028
11031 MachineBasicBlock *MBB) const {
11032 DebugLoc DL = MI.getDebugLoc();
11033 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11034
11035 MachineFunction *MF = MBB->getParent();
11037
11039 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11040 "Invalid Pointer Size!");
11041
11042 const TargetRegisterClass *RC =
11043 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11044 Register Tmp = MRI.createVirtualRegister(RC);
11045 // Since FP is only updated here but NOT referenced, it's treated as GPR.
11046 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11047 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11048 unsigned BP =
11049 (PVT == MVT::i64)
11050 ? PPC::X30
11051 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11052 : PPC::R30);
11053
11055
11056 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11057 const int64_t SPOffset = 2 * PVT.getStoreSize();
11058 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11059 const int64_t BPOffset = 4 * PVT.getStoreSize();
11060
11061 Register BufReg = MI.getOperand(0).getReg();
11062
11063 // Reload FP (the jumped-to function may not have had a
11064 // frame pointer, and if so, then its r31 will be restored
11065 // as necessary).
11066 if (PVT == MVT::i64) {
11067 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11068 .addImm(0)
11069 .addReg(BufReg);
11070 } else {
11071 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11072 .addImm(0)
11073 .addReg(BufReg);
11074 }
11075 MIB.cloneMemRefs(MI);
11076
11077 // Reload IP
11078 if (PVT == MVT::i64) {
11079 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11081 .addReg(BufReg);
11082 } else {
11083 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11085 .addReg(BufReg);
11086 }
11087 MIB.cloneMemRefs(MI);
11088
11089 // Reload SP
11090 if (PVT == MVT::i64) {
11091 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11092 .addImm(SPOffset)
11093 .addReg(BufReg);
11094 } else {
11095 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11096 .addImm(SPOffset)
11097 .addReg(BufReg);
11098 }
11099 MIB.cloneMemRefs(MI);
11100
11101 // Reload BP
11102 if (PVT == MVT::i64) {
11103 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11105 .addReg(BufReg);
11106 } else {
11107 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11109 .addReg(BufReg);
11110 }
11111 MIB.cloneMemRefs(MI);
11112
11113 // Reload TOC
11114 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11116 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11118 .addReg(BufReg)
11119 .cloneMemRefs(MI);
11120 }
11121
11122 // Jump
11123 BuildMI(*MBB, MI, DL,
11124 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11125 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11126
11127 MI.eraseFromParent();
11128 return MBB;
11129}
11130
11132 // If the function specifically requests inline stack probes, emit them.
11133 if (MF.getFunction().hasFnAttribute("probe-stack"))
11134 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11135 "inline-asm";
11136 return false;
11137}
11138
11140 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11141 unsigned StackAlign = TFI->getStackAlignment();
11142 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
11143 "Unexpected stack alignment");
11144 // The default stack probe size is 4096 if the function has no
11145 // stack-probe-size attribute.
11146 unsigned StackProbeSize = 4096;
11147 const Function &Fn = MF.getFunction();
11148 if (Fn.hasFnAttribute("stack-probe-size"))
11149 Fn.getFnAttribute("stack-probe-size")
11151 .getAsInteger(0, StackProbeSize);
11152 // Round down to the stack alignment.
11153 StackProbeSize &= ~(StackAlign - 1);
11154 return StackProbeSize ? StackProbeSize : StackAlign;
11155}
11156
11157// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11158// into three phases. In the first phase, it uses pseudo instruction
11159// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11160// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11161// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11162// MaxCallFrameSize so that it can calculate correct data area pointer.
11165 MachineBasicBlock *MBB) const {
11166 const bool isPPC64 = Subtarget.isPPC64();
11167 MachineFunction *MF = MBB->getParent();
11168 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11169 DebugLoc DL = MI.getDebugLoc();
11170 const unsigned ProbeSize = getStackProbeSize(*MF);
11173 // The CFG of probing stack looks as
11174 // +-----+
11175 // | MBB |
11176 // +--+--+
11177 // |
11178 // +----v----+
11179 // +--->+ TestMBB +---+
11180 // | +----+----+ |
11181 // | | |
11182 // | +-----v----+ |
11183 // +---+ BlockMBB | |
11184 // +----------+ |
11185 // |
11186 // +---------+ |
11187 // | TailMBB +<--+
11188 // +---------+
11189 // In MBB, calculate previous frame pointer and final stack pointer.
11190 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11191 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11192 // TailMBB is spliced via \p MI.
11196
11198 MF->insert(MBBIter, TestMBB);
11199 MF->insert(MBBIter, BlockMBB);
11200 MF->insert(MBBIter, TailMBB);
11201
11202 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11203 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11204
11205 Register DstReg = MI.getOperand(0).getReg();
11206 Register NegSizeReg = MI.getOperand(1).getReg();
11207 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11208 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11209 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11210 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11211
11212 // Since value of NegSizeReg might be realigned in prologepilog, insert a
11213 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11214 // NegSize.
11215 unsigned ProbeOpc;
11216 if (!MRI.hasOneNonDBGUse(NegSizeReg))
11217 ProbeOpc =
11218 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11219 else
11220 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11221 // and NegSizeReg will be allocated in the same phyreg to avoid
11222 // redundant copy when NegSizeReg has only one use which is current MI and
11223 // will be replaced by PREPARE_PROBED_ALLOCA then.
11224 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11225 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11226 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11229 .add(MI.getOperand(2))
11230 .add(MI.getOperand(3));
11231
11232 // Calculate final stack pointer, which equals to SP + ActualNegSize.
11233 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11235 .addReg(SPReg)
11237
11238 // Materialize a scratch register for update.
11239 int64_t NegProbeSize = -(int64_t)ProbeSize;
11240 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11241 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11242 if (!isInt<16>(NegProbeSize)) {
11243 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11244 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11245 .addImm(NegProbeSize >> 16);
11246 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11247 ScratchReg)
11248 .addReg(TempReg)
11249 .addImm(NegProbeSize & 0xFFFF);
11250 } else
11251 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11253
11254 {
11255 // Probing leading residual part.
11256 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11257 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11260 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11261 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11262 .addReg(Div)
11264 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11265 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11266 .addReg(Mul)
11268 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11269 .addReg(FramePointer)
11270 .addReg(SPReg)
11271 .addReg(NegMod);
11272 }
11273
11274 {
11275 // Remaining part should be multiple of ProbeSize.
11276 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11277 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11278 .addReg(SPReg)
11280 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11283 .addMBB(TailMBB);
11284 TestMBB->addSuccessor(BlockMBB);
11285 TestMBB->addSuccessor(TailMBB);
11286 }
11287
11288 {
11289 // Touch the block.
11290 // |P...|P...|P...
11291 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11292 .addReg(FramePointer)
11293 .addReg(SPReg)
11295 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11296 BlockMBB->addSuccessor(TestMBB);
11297 }
11298
11299 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11300 // DYNAREAOFFSET pseudo instruction to get the future result.
11302 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11304 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11306 .add(MI.getOperand(2))
11307 .add(MI.getOperand(3));
11308 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11309 .addReg(SPReg)
11311
11312 // Splice instructions after MI to TailMBB.
11313 TailMBB->splice(TailMBB->end(), MBB,
11314 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11315 TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11317
11318 // Delete the pseudo instruction.
11319 MI.eraseFromParent();
11320
11322 return TailMBB;
11323}
11324
11327 MachineBasicBlock *BB) const {
11328 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11329 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11330 if (Subtarget.is64BitELFABI() &&
11331 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11332 !Subtarget.isUsingPCRelativeCalls()) {
11333 // Call lowering should have added an r2 operand to indicate a dependence
11334 // on the TOC base pointer value. It can't however, because there is no
11335 // way to mark the dependence as implicit there, and so the stackmap code
11336 // will confuse it with a regular operand. Instead, add the dependence
11337 // here.
11338 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11339 }
11340
11341 return emitPatchPoint(MI, BB);
11342 }
11343
11344 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11345 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11346 return emitEHSjLjSetJmp(MI, BB);
11347 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11348 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11349 return emitEHSjLjLongJmp(MI, BB);
11350 }
11351
11352 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11353
11354 // To "insert" these instructions we actually have to insert their
11355 // control-flow patterns.
11356 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11358
11359 MachineFunction *F = BB->getParent();
11360
11361 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11362 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11363 MI.getOpcode() == PPC::SELECT_I8) {
11365 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11366 MI.getOpcode() == PPC::SELECT_CC_I8)
11367 Cond.push_back(MI.getOperand(4));
11368 else
11370 Cond.push_back(MI.getOperand(1));
11371
11372 DebugLoc dl = MI.getDebugLoc();
11373 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11374 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11375 } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11376 MI.getOpcode() == PPC::SELECT_CC_F8 ||
11377 MI.getOpcode() == PPC::SELECT_CC_F16 ||
11378 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11379 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11380 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11381 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11382 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11383 MI.getOpcode() == PPC::SELECT_CC_SPE ||
11384 MI.getOpcode() == PPC::SELECT_F4 ||
11385 MI.getOpcode() == PPC::SELECT_F8 ||
11386 MI.getOpcode() == PPC::SELECT_F16 ||
11387 MI.getOpcode() == PPC::SELECT_SPE ||
11388 MI.getOpcode() == PPC::SELECT_SPE4 ||
11389 MI.getOpcode() == PPC::SELECT_VRRC ||
11390 MI.getOpcode() == PPC::SELECT_VSFRC ||
11391 MI.getOpcode() == PPC::SELECT_VSSRC ||
11392 MI.getOpcode() == PPC::SELECT_VSRC) {
11393 // The incoming instruction knows the destination vreg to set, the
11394 // condition code register to branch on, the true/false values to
11395 // select between, and a branch opcode to use.
11396
11397 // thisMBB:
11398 // ...
11399 // TrueVal = ...
11400 // cmpTY ccX, r1, r2
11401 // bCC copy1MBB
11402 // fallthrough --> copy0MBB
11404 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11405 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11406 DebugLoc dl = MI.getDebugLoc();
11407 F->insert(It, copy0MBB);
11408 F->insert(It, sinkMBB);
11409
11410 // Transfer the remainder of BB and its successor edges to sinkMBB.
11411 sinkMBB->splice(sinkMBB->begin(), BB,
11412 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11413 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11414
11415 // Next, add the true and fallthrough blocks as its successors.
11417 BB->addSuccessor(sinkMBB);
11418
11419 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11420 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11421 MI.getOpcode() == PPC::SELECT_F16 ||
11422 MI.getOpcode() == PPC::SELECT_SPE4 ||
11423 MI.getOpcode() == PPC::SELECT_SPE ||
11424 MI.getOpcode() == PPC::SELECT_VRRC ||
11425 MI.getOpcode() == PPC::SELECT_VSFRC ||
11426 MI.getOpcode() == PPC::SELECT_VSSRC ||
11427 MI.getOpcode() == PPC::SELECT_VSRC) {
11428 BuildMI(BB, dl, TII->get(PPC::BC))
11429 .addReg(MI.getOperand(1).getReg())
11430 .addMBB(sinkMBB);
11431 } else {
11432 unsigned SelectPred = MI.getOperand(4).getImm();
11433 BuildMI(BB, dl, TII->get(PPC::BCC))
11435 .addReg(MI.getOperand(1).getReg())
11436 .addMBB(sinkMBB);
11437 }
11438
11439 // copy0MBB:
11440 // %FalseValue = ...
11441 // # fallthrough to sinkMBB
11442 BB = copy0MBB;
11443
11444 // Update machine-CFG edges
11445 BB->addSuccessor(sinkMBB);
11446
11447 // sinkMBB:
11448 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11449 // ...
11450 BB = sinkMBB;
11451 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11452 .addReg(MI.getOperand(3).getReg())
11454 .addReg(MI.getOperand(2).getReg())
11455 .addMBB(thisMBB);
11456 } else if (MI.getOpcode() == PPC::ReadTB) {
11457 // To read the 64-bit time-base register on a 32-bit target, we read the
11458 // two halves. Should the counter have wrapped while it was being read, we
11459 // need to try again.
11460 // ...
11461 // readLoop:
11462 // mfspr Rx,TBU # load from TBU
11463 // mfspr Ry,TB # load from TB
11464 // mfspr Rz,TBU # load from TBU
11465 // cmpw crX,Rx,Rz # check if 'old'='new'
11466 // bne readLoop # branch if they're not equal
11467 // ...
11468
11469 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
11470 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11471 DebugLoc dl = MI.getDebugLoc();
11472 F->insert(It, readMBB);
11473 F->insert(It, sinkMBB);
11474
11475 // Transfer the remainder of BB and its successor edges to sinkMBB.
11476 sinkMBB->splice(sinkMBB->begin(), BB,
11477 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11478 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11479
11480 BB->addSuccessor(readMBB);
11481 BB = readMBB;
11482
11483 MachineRegisterInfo &RegInfo = F->getRegInfo();
11484 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11485 Register LoReg = MI.getOperand(0).getReg();
11486 Register HiReg = MI.getOperand(1).getReg();
11487
11488 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
11489 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
11490 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
11491
11492 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11493
11494 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
11495 .addReg(HiReg)
11497 BuildMI(BB, dl, TII->get(PPC::BCC))
11499 .addReg(CmpReg)
11500 .addMBB(readMBB);
11501
11502 BB->addSuccessor(readMBB);
11503 BB->addSuccessor(sinkMBB);
11504 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
11505 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
11506 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
11507 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
11508 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
11509 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
11510 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
11511 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
11512
11513 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
11514 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
11515 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
11516 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
11517 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
11518 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
11519 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
11520 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
11521
11522 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
11523 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
11524 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
11525 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
11526 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
11527 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
11528 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
11529 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
11530
11531 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
11532 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
11533 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
11534 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
11535 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
11536 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
11537 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
11538 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
11539
11540 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
11541 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
11542 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
11543 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
11544 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
11545 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
11546 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
11547 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
11548
11549 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
11550 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
11551 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
11552 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
11553 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
11554 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
11555 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
11556 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
11557
11558 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
11559 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
11560 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
11561 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
11562 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
11563 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
11564 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
11565 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
11566
11567 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
11568 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
11569 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
11570 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
11571 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
11572 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
11573 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
11574 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
11575
11576 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
11577 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
11578 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
11579 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
11580 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
11581 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
11582 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
11583 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
11584
11585 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
11586 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
11587 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
11588 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
11589 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
11590 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
11591 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
11592 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
11593
11594 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
11595 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
11596 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
11597 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
11598 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
11599 BB = EmitAtomicBinary(MI, BB, 4, 0);
11600 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
11601 BB = EmitAtomicBinary(MI, BB, 8, 0);
11602 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
11603 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
11604 (Subtarget.hasPartwordAtomics() &&
11605 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
11606 (Subtarget.hasPartwordAtomics() &&
11607 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
11608 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
11609
11610 auto LoadMnemonic = PPC::LDARX;
11611 auto StoreMnemonic = PPC::STDCX;
11612 switch (MI.getOpcode()) {
11613 default:
11614 llvm_unreachable("Compare and swap of unknown size");
11615 case PPC::ATOMIC_CMP_SWAP_I8:
11616 LoadMnemonic = PPC::LBARX;
11617 StoreMnemonic = PPC::STBCX;
11618 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11619 break;
11620 case PPC::ATOMIC_CMP_SWAP_I16:
11621 LoadMnemonic = PPC::LHARX;
11622 StoreMnemonic = PPC::STHCX;
11623 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11624 break;
11625 case PPC::ATOMIC_CMP_SWAP_I32:
11626 LoadMnemonic = PPC::LWARX;
11627 StoreMnemonic = PPC::STWCX;
11628 break;
11629 case PPC::ATOMIC_CMP_SWAP_I64:
11630 LoadMnemonic = PPC::LDARX;
11631 StoreMnemonic = PPC::STDCX;
11632 break;
11633 }
11634 Register dest = MI.getOperand(0).getReg();
11635 Register ptrA = MI.getOperand(1).getReg();
11636 Register ptrB = MI.getOperand(2).getReg();
11637 Register oldval = MI.getOperand(3).getReg();
11638 Register newval = MI.getOperand(4).getReg();
11639 DebugLoc dl = MI.getDebugLoc();
11640
11641 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11642 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11643 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11644 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11645 F->insert(It, loop1MBB);
11646 F->insert(It, loop2MBB);
11647 F->insert(It, midMBB);
11648 F->insert(It, exitMBB);
11649 exitMBB->splice(exitMBB->begin(), BB,
11650 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11651 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11652
11653 // thisMBB:
11654 // ...
11655 // fallthrough --> loopMBB
11657
11658 // loop1MBB:
11659 // l[bhwd]arx dest, ptr
11660 // cmp[wd] dest, oldval
11661 // bne- midMBB
11662 // loop2MBB:
11663 // st[bhwd]cx. newval, ptr
11664 // bne- loopMBB
11665 // b exitBB
11666 // midMBB:
11667 // st[bhwd]cx. dest, ptr
11668 // exitBB:
11669 BB = loop1MBB;
11670 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11671 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11672 .addReg(oldval)
11673 .addReg(dest);
11674 BuildMI(BB, dl, TII->get(PPC::BCC))
11676 .addReg(PPC::CR0)
11677 .addMBB(midMBB);
11679 BB->addSuccessor(midMBB);
11680
11681 BB = loop2MBB;
11682 BuildMI(BB, dl, TII->get(StoreMnemonic))
11683 .addReg(newval)
11684 .addReg(ptrA)
11685 .addReg(ptrB);
11686 BuildMI(BB, dl, TII->get(PPC::BCC))
11688 .addReg(PPC::CR0)
11689 .addMBB(loop1MBB);
11690 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11692 BB->addSuccessor(exitMBB);
11693
11694 BB = midMBB;
11695 BuildMI(BB, dl, TII->get(StoreMnemonic))
11696 .addReg(dest)
11697 .addReg(ptrA)
11698 .addReg(ptrB);
11699 BB->addSuccessor(exitMBB);
11700
11701 // exitMBB:
11702 // ...
11703 BB = exitMBB;
11704 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11705 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11706 // We must use 64-bit registers for addresses when targeting 64-bit,
11707 // since we're actually doing arithmetic on them. Other registers
11708 // can be 32-bit.
11709 bool is64bit = Subtarget.isPPC64();
11710 bool isLittleEndian = Subtarget.isLittleEndian();
11711 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11712
11713 Register dest = MI.getOperand(0).getReg();
11714 Register ptrA = MI.getOperand(1).getReg();
11715 Register ptrB = MI.getOperand(2).getReg();
11716 Register oldval = MI.getOperand(3).getReg();
11717 Register newval = MI.getOperand(4).getReg();
11718 DebugLoc dl = MI.getDebugLoc();
11719
11720 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11721 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11722 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11723 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11724 F->insert(It, loop1MBB);
11725 F->insert(It, loop2MBB);
11726 F->insert(It, midMBB);
11727 F->insert(It, exitMBB);
11728 exitMBB->splice(exitMBB->begin(), BB,
11729 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11730 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11731
11732 MachineRegisterInfo &RegInfo = F->getRegInfo();
11733 const TargetRegisterClass *RC =
11734 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11735 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11736
11737 Register PtrReg = RegInfo.createVirtualRegister(RC);
11738 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11739 Register ShiftReg =
11740 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11741 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11742 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11743 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11744 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11745 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11746 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11747 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11748 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11749 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11750 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11752 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11753 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11754 // thisMBB:
11755 // ...
11756 // fallthrough --> loopMBB
11758
11759 // The 4-byte load must be aligned, while a char or short may be
11760 // anywhere in the word. Hence all this nasty bookkeeping code.
11761 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11762 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11763 // xori shift, shift1, 24 [16]
11764 // rlwinm ptr, ptr1, 0, 0, 29
11765 // slw newval2, newval, shift
11766 // slw oldval2, oldval,shift
11767 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11768 // slw mask, mask2, shift
11769 // and newval3, newval2, mask
11770 // and oldval3, oldval2, mask
11771 // loop1MBB:
11772 // lwarx tmpDest, ptr
11773 // and tmp, tmpDest, mask
11774 // cmpw tmp, oldval3
11775 // bne- midMBB
11776 // loop2MBB:
11777 // andc tmp2, tmpDest, mask
11778 // or tmp4, tmp2, newval3
11779 // stwcx. tmp4, ptr
11780 // bne- loop1MBB
11781 // b exitBB
11782 // midMBB:
11783 // stwcx. tmpDest, ptr
11784 // exitBB:
11785 // srw dest, tmpDest, shift
11786 if (ptrA != ZeroReg) {
11787 Ptr1Reg = RegInfo.createVirtualRegister(RC);
11788 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11789 .addReg(ptrA)
11790 .addReg(ptrB);
11791 } else {
11792 Ptr1Reg = ptrB;
11793 }
11794
11795 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11796 // mode.
11797 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11798 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11799 .addImm(3)
11800 .addImm(27)
11801 .addImm(is8bit ? 28 : 27);
11802 if (!isLittleEndian)
11803 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11805 .addImm(is8bit ? 24 : 16);
11806 if (is64bit)
11807 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11808 .addReg(Ptr1Reg)
11809 .addImm(0)
11810 .addImm(61);
11811 else
11812 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11813 .addReg(Ptr1Reg)
11814 .addImm(0)
11815 .addImm(0)
11816 .addImm(29);
11817 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11818 .addReg(newval)
11819 .addReg(ShiftReg);
11820 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11821 .addReg(oldval)
11822 .addReg(ShiftReg);
11823 if (is8bit)
11824 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11825 else {
11826 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11827 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11829 .addImm(65535);
11830 }
11831 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11833 .addReg(ShiftReg);
11834 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11836 .addReg(MaskReg);
11837 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11839 .addReg(MaskReg);
11840
11841 BB = loop1MBB;
11842 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11843 .addReg(ZeroReg)
11844 .addReg(PtrReg);
11845 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11847 .addReg(MaskReg);
11848 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11849 .addReg(TmpReg)
11851 BuildMI(BB, dl, TII->get(PPC::BCC))
11853 .addReg(PPC::CR0)
11854 .addMBB(midMBB);
11856 BB->addSuccessor(midMBB);
11857
11858 BB = loop2MBB;
11859 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11861 .addReg(MaskReg);
11862 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11863 .addReg(Tmp2Reg)
11865 BuildMI(BB, dl, TII->get(PPC::STWCX))
11866 .addReg(Tmp4Reg)
11867 .addReg(ZeroReg)
11868 .addReg(PtrReg);
11869 BuildMI(BB, dl, TII->get(PPC::BCC))
11871 .addReg(PPC::CR0)
11872 .addMBB(loop1MBB);
11873 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11875 BB->addSuccessor(exitMBB);
11876
11877 BB = midMBB;
11878 BuildMI(BB, dl, TII->get(PPC::STWCX))
11880 .addReg(ZeroReg)
11881 .addReg(PtrReg);
11882 BB->addSuccessor(exitMBB);
11883
11884 // exitMBB:
11885 // ...
11886 BB = exitMBB;
11887 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11888 .addReg(TmpReg)
11889 .addReg(ShiftReg);
11890 } else if (MI.getOpcode() == PPC::FADDrtz) {
11891 // This pseudo performs an FADD with rounding mode temporarily forced
11892 // to round-to-zero. We emit this via custom inserter since the FPSCR
11893 // is not modeled at the SelectionDAG level.
11894 Register Dest = MI.getOperand(0).getReg();
11895 Register Src1 = MI.getOperand(1).getReg();
11896 Register Src2 = MI.getOperand(2).getReg();
11897 DebugLoc dl = MI.getDebugLoc();
11898
11899 MachineRegisterInfo &RegInfo = F->getRegInfo();
11900 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11901
11902 // Save FPSCR value.
11903 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
11904
11905 // Set rounding mode to round-to-zero.
11906 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
11907 .addImm(31)
11909
11910 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
11911 .addImm(30)
11913
11914 // Perform addition.
11915 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
11916 .addReg(Src1)
11917 .addReg(Src2);
11918 if (MI.getFlag(MachineInstr::NoFPExcept))
11920
11921 // Restore FPSCR value.
11922 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
11923 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
11924 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
11925 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
11926 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
11927 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
11928 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
11929 ? PPC::ANDI8_rec
11930 : PPC::ANDI_rec;
11931 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
11932 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
11933
11934 MachineRegisterInfo &RegInfo = F->getRegInfo();
11935 Register Dest = RegInfo.createVirtualRegister(
11936 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
11937
11938 DebugLoc Dl = MI.getDebugLoc();
11939 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
11940 .addReg(MI.getOperand(1).getReg())
11941 .addImm(1);
11942 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11943 MI.getOperand(0).getReg())
11944 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
11945 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
11946 DebugLoc Dl = MI.getDebugLoc();
11947 MachineRegisterInfo &RegInfo = F->getRegInfo();
11948 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11949 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
11950 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11951 MI.getOperand(0).getReg())
11952 .addReg(CRReg);
11953 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
11954 DebugLoc Dl = MI.getDebugLoc();
11955 unsigned Imm = MI.getOperand(1).getImm();
11956 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
11957 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11958 MI.getOperand(0).getReg())
11959 .addReg(PPC::CR0EQ);
11960 } else if (MI.getOpcode() == PPC::SETRNDi) {
11961 DebugLoc dl = MI.getDebugLoc();
11962 Register OldFPSCRReg = MI.getOperand(0).getReg();
11963
11964 // Save FPSCR value.
11965 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11966
11967 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
11968 // the following settings:
11969 // 00 Round to nearest
11970 // 01 Round to 0
11971 // 10 Round to +inf
11972 // 11 Round to -inf
11973
11974 // When the operand is immediate, using the two least significant bits of
11975 // the immediate to set the bits 62:63 of FPSCR.
11976 unsigned Mode = MI.getOperand(1).getImm();
11977 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
11978 .addImm(31)
11980
11981 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
11982 .addImm(30)
11984 } else if (MI.getOpcode() == PPC::SETRND) {
11985 DebugLoc dl = MI.getDebugLoc();
11986
11987 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
11988 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
11989 // If the target doesn't have DirectMove, we should use stack to do the
11990 // conversion, because the target doesn't have the instructions like mtvsrd
11991 // or mfvsrd to do this conversion directly.
11992 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
11993 if (Subtarget.hasDirectMove()) {
11994 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
11995 .addReg(SrcReg);
11996 } else {
11997 // Use stack to do the register copy.
11998 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
11999 MachineRegisterInfo &RegInfo = F->getRegInfo();
12000 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12001 if (RC == &PPC::F8RCRegClass) {
12002 // Copy register from F8RCRegClass to G8RCRegclass.
12003 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12004 "Unsupported RegClass.");
12005
12006 StoreOp = PPC::STFD;
12007 LoadOp = PPC::LD;
12008 } else {
12009 // Copy register from G8RCRegClass to F8RCRegclass.
12010 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12011 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12012 "Unsupported RegClass.");
12013 }
12014
12015 MachineFrameInfo &MFI = F->getFrameInfo();
12016 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12017
12018 MachineMemOperand *MMOStore = F->getMachineMemOperand(
12019 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12021 MFI.getObjectAlign(FrameIdx));
12022
12023 // Store the SrcReg into the stack.
12024 BuildMI(*BB, MI, dl, TII->get(StoreOp))
12025 .addReg(SrcReg)
12026 .addImm(0)
12027 .addFrameIndex(FrameIdx)
12029
12030 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12031 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12033 MFI.getObjectAlign(FrameIdx));
12034
12035 // Load from the stack where SrcReg is stored, and save to DestReg,
12036 // so we have done the RegClass conversion from RegClass::SrcReg to
12037 // RegClass::DestReg.
12038 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12039 .addImm(0)
12040 .addFrameIndex(FrameIdx)
12042 }
12043 };
12044
12045 Register OldFPSCRReg = MI.getOperand(0).getReg();
12046
12047 // Save FPSCR value.
12048 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12049
12050 // When the operand is gprc register, use two least significant bits of the
12051 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12052 //
12053 // copy OldFPSCRTmpReg, OldFPSCRReg
12054 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12055 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12056 // copy NewFPSCRReg, NewFPSCRTmpReg
12057 // mtfsf 255, NewFPSCRReg
12058 MachineOperand SrcOp = MI.getOperand(1);
12059 MachineRegisterInfo &RegInfo = F->getRegInfo();
12060 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12061
12063
12064 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12065 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12066
12067 // The first operand of INSERT_SUBREG should be a register which has
12068 // subregisters, we only care about its RegClass, so we should use an
12069 // IMPLICIT_DEF register.
12070 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12071 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12073 .add(SrcOp)
12074 .addImm(1);
12075
12076 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12077 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12080 .addImm(0)
12081 .addImm(62);
12082
12083 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12085
12086 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12087 // bits of FPSCR.
12088 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12089 .addImm(255)
12091 .addImm(0)
12092 .addImm(0);
12093 } else if (MI.getOpcode() == PPC::SETFLM) {
12094 DebugLoc Dl = MI.getDebugLoc();
12095
12096 // Result of setflm is previous FPSCR content, so we need to save it first.
12097 Register OldFPSCRReg = MI.getOperand(0).getReg();
12098 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12099
12100 // Put bits in 32:63 to FPSCR.
12101 Register NewFPSCRReg = MI.getOperand(1).getReg();
12102 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12103 .addImm(255)
12105 .addImm(0)
12106 .addImm(0);
12107 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12108 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12109 return emitProbedAlloca(MI, BB);
12110 } else {
12111 llvm_unreachable("Unexpected instr type to insert");
12112 }
12113
12114 MI.eraseFromParent(); // The pseudo instruction is gone now.
12115 return BB;
12116}
12117
12118//===----------------------------------------------------------------------===//
12119// Target Optimization Hooks
12120//===----------------------------------------------------------------------===//
12121
12122static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12123 // For the estimates, convergence is quadratic, so we essentially double the
12124 // number of digits correct after every iteration. For both FRE and FRSQRTE,
12125 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12126 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12127 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12128 if (VT.getScalarType() == MVT::f64)
12130 return RefinementSteps;
12131}
12132
12133SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
12134 const DenormalMode &Mode) const {
12135 // We only have VSX Vector Test for software Square Root.
12136 EVT VT = Op.getValueType();
12137 if (!isTypeLegal(MVT::i1) ||
12138 (VT != MVT::f64 &&
12139 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
12140 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
12141
12142 SDLoc DL(Op);
12143 // The output register of FTSQRT is CR field.
12145 // ftsqrt BF,FRB
12146 // Let e_b be the unbiased exponent of the double-precision
12147 // floating-point operand in register FRB.
12148 // fe_flag is set to 1 if either of the following conditions occurs.
12149 // - The double-precision floating-point operand in register FRB is a zero,
12150 // a NaN, or an infinity, or a negative value.
12151 // - e_b is less than or equal to -970.
12152 // Otherwise fe_flag is set to 0.
12153 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
12154 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
12155 // exponent is less than -970)
12156 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
12157 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
12158 FTSQRT, SRIdxVal),
12159 0);
12160}
12161
12162SDValue
12163PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
12164 SelectionDAG &DAG) const {
12165 // We only have VSX Vector Square Root.
12166 EVT VT = Op.getValueType();
12167 if (VT != MVT::f64 &&
12168 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
12170
12171 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
12172}
12173
12174SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12175 int Enabled, int &RefinementSteps,
12176 bool &UseOneConstNR,
12177 bool Reciprocal) const {
12178 EVT VT = Operand.getValueType();
12179 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12180 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12181 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12182 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12185
12186 // The Newton-Raphson computation with a single constant does not provide
12187 // enough accuracy on some CPUs.
12188 UseOneConstNR = !Subtarget.needsTwoConstNR();
12189 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12190 }
12191 return SDValue();
12192}
12193
12194SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12195 int Enabled,
12196 int &RefinementSteps) const {
12197 EVT VT = Operand.getValueType();
12198 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12199 (VT == MVT::f64 && Subtarget.hasFRE()) ||
12200 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12201 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12204 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12205 }
12206 return SDValue();
12207}
12208
12209unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12210 // Note: This functionality is used only when unsafe-fp-math is enabled, and
12211 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12212 // enabled for division), this functionality is redundant with the default
12213 // combiner logic (once the division -> reciprocal/multiply transformation
12214 // has taken place). As a result, this matters more for older cores than for
12215 // newer ones.
12216
12217 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12218 // reciprocal if there are two or more FDIVs (for embedded cores with only
12219 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12220 switch (Subtarget.getCPUDirective()) {
12221 default:
12222 return 3;
12223 case PPC::DIR_440:
12224 case PPC::DIR_A2:
12225 case PPC::DIR_E500:
12226 case PPC::DIR_E500mc:
12227 case PPC::DIR_E5500:
12228 return 2;
12229 }
12230}
12231
12232// isConsecutiveLSLoc needs to work even if all adds have not yet been
12233// collapsed, and so we need to look through chains of them.
12235 int64_t& Offset, SelectionDAG &DAG) {
12236 if (DAG.isBaseWithConstantOffset(Loc)) {
12237 Base = Loc.getOperand(0);
12238 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12239
12240 // The base might itself be a base plus an offset, and if so, accumulate
12241 // that as well.
12242 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12243 }
12244}
12245
12246static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12247 unsigned Bytes, int Dist,
12248 SelectionDAG &DAG) {
12249 if (VT.getSizeInBits() / 8 != Bytes)
12250 return false;
12251
12252 SDValue BaseLoc = Base->getBasePtr();
12253 if (Loc.getOpcode() == ISD::FrameIndex) {
12254 if (BaseLoc.getOpcode() != ISD::FrameIndex)
12255 return false;
12257 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
12258 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12259 int FS = MFI.getObjectSize(FI);
12260 int BFS = MFI.getObjectSize(BFI);
12261 if (FS != BFS || FS != (int)Bytes) return false;
12262 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12263 }
12264
12265 SDValue Base1 = Loc, Base2 = BaseLoc;
12266 int64_t Offset1 = 0, Offset2 = 0;
12269 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12270 return true;
12271
12272 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12273 const GlobalValue *GV1 = nullptr;
12274 const GlobalValue *GV2 = nullptr;
12275 Offset1 = 0;
12276 Offset2 = 0;
12277 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12278 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12279 if (isGA1 && isGA2 && GV1 == GV2)
12280 return Offset1 == (Offset2 + Dist*Bytes);
12281 return false;
12282}
12283
12284// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12285// not enforce equality of the chain operands.
12287 unsigned Bytes, int Dist,
12288 SelectionDAG &DAG) {
12290 EVT VT = LS->getMemoryVT();
12291 SDValue Loc = LS->getBasePtr();
12292 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12293 }
12294
12295 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12296 EVT VT;
12297 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12298 default: return false;
12299 case Intrinsic::ppc_altivec_lvx:
12300 case Intrinsic::ppc_altivec_lvxl:
12301 case Intrinsic::ppc_vsx_lxvw4x:
12302 case Intrinsic::ppc_vsx_lxvw4x_be:
12303 VT = MVT::v4i32;
12304 break;
12305 case Intrinsic::ppc_vsx_lxvd2x:
12306 case Intrinsic::ppc_vsx_lxvd2x_be:
12307 VT = MVT::v2f64;
12308 break;
12309 case Intrinsic::ppc_altivec_lvebx:
12310 VT = MVT::i8;
12311 break;
12312 case Intrinsic::ppc_altivec_lvehx:
12313 VT = MVT::i16;
12314 break;
12315 case Intrinsic::ppc_altivec_lvewx:
12316 VT = MVT::i32;
12317 break;
12318 }
12319
12320 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12321 }
12322
12323 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12324 EVT VT;
12325 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12326 default: return false;
12327 case Intrinsic::ppc_altivec_stvx:
12328 case Intrinsic::ppc_altivec_stvxl:
12329 case Intrinsic::ppc_vsx_stxvw4x:
12330 VT = MVT::v4i32;
12331 break;
12332 case Intrinsic::ppc_vsx_stxvd2x:
12333 VT = MVT::v2f64;
12334 break;
12335 case Intrinsic::ppc_vsx_stxvw4x_be:
12336 VT = MVT::v4i32;
12337 break;
12338 case Intrinsic::ppc_vsx_stxvd2x_be:
12339 VT = MVT::v2f64;
12340 break;
12341 case Intrinsic::ppc_altivec_stvebx:
12342 VT = MVT::i8;
12343 break;
12344 case Intrinsic::ppc_altivec_stvehx:
12345 VT = MVT::i16;
12346 break;
12347 case Intrinsic::ppc_altivec_stvewx:
12348 VT = MVT::i32;
12349 break;
12350 }
12351
12352 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12353 }
12354
12355 return false;
12356}
12357
12358// Return true is there is a nearyby consecutive load to the one provided
12359// (regardless of alignment). We search up and down the chain, looking though
12360// token factors and other loads (but nothing else). As a result, a true result
12361// indicates that it is safe to create a new consecutive load adjacent to the
12362// load provided.
12364 SDValue Chain = LD->getChain();
12365 EVT VT = LD->getMemoryVT();
12366
12368 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12369 SmallSet<SDNode *, 16> Visited;
12370
12371 // First, search up the chain, branching to follow all token-factor operands.
12372 // If we find a consecutive load, then we're done, otherwise, record all
12373 // nodes just above the top-level loads and token factors.
12374 while (!Queue.empty()) {
12375 SDNode *ChainNext = Queue.pop_back_val();
12376 if (!Visited.insert(ChainNext).second)
12377 continue;
12378
12380 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12381 return true;
12382
12383 if (!Visited.count(ChainLD->getChain().getNode()))
12384 Queue.push_back(ChainLD->getChain().getNode());
12385 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12386 for (const SDUse &O : ChainNext->ops())
12387 if (!Visited.count(O.getNode()))
12388 Queue.push_back(O.getNode());
12389 } else
12390 LoadRoots.insert(ChainNext);
12391 }
12392
12393 // Second, search down the chain, starting from the top-level nodes recorded
12394 // in the first phase. These top-level nodes are the nodes just above all
12395 // loads and token factors. Starting with their uses, recursively look though
12396 // all loads (just the chain uses) and token factors to find a consecutive
12397 // load.
12398 Visited.clear();
12399 Queue.clear();
12400
12402 IE = LoadRoots.end(); I != IE; ++I) {
12403 Queue.push_back(*I);
12404
12405 while (!Queue.empty()) {
12406 SDNode *LoadRoot = Queue.pop_back_val();
12407 if (!Visited.insert(LoadRoot).second)
12408 continue;
12409
12411 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12412 return true;
12413
12414 for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12415 UE = LoadRoot->use_end(); UI != UE; ++UI)
12416 if (((isa<MemSDNode>(*UI) &&
12417 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12418 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12419 Queue.push_back(*UI);
12420 }
12421 }
12422
12423 return false;
12424}
12425
12426/// This function is called when we have proved that a SETCC node can be replaced
12427/// by subtraction (and other supporting instructions) so that the result of
12428/// comparison is kept in a GPR instead of CR. This function is purely for
12429/// codegen purposes and has some flags to guide the codegen process.
12431 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12432 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12433
12434 // Zero extend the operands to the largest legal integer. Originally, they
12435 // must be of a strictly smaller size.
12436 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12437 DAG.getConstant(Size, DL, MVT::i32));
12438 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12439 DAG.getConstant(Size, DL, MVT::i32));
12440
12441 // Swap if needed. Depends on the condition code.
12442 if (Swap)
12443 std::swap(Op0, Op1);
12444
12445 // Subtract extended integers.
12446 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12447
12448 // Move the sign bit to the least significant position and zero out the rest.
12449 // Now the least significant bit carries the result of original comparison.
12450 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12451 DAG.getConstant(Size - 1, DL, MVT::i32));
12452 auto Final = Shifted;
12453
12454 // Complement the result if needed. Based on the condition code.
12455 if (Complement)
12456 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12457 DAG.getConstant(1, DL, MVT::i64));
12458
12459 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12460}
12461
12462SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12463 DAGCombinerInfo &DCI) const {
12464 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12465
12466 SelectionDAG &DAG = DCI.DAG;
12467 SDLoc DL(N);
12468
12469 // Size of integers being compared has a critical role in the following
12470 // analysis, so we prefer to do this when all types are legal.
12471 if (!DCI.isAfterLegalizeDAG())
12472 return SDValue();
12473
12474 // If all users of SETCC extend its value to a legal integer type
12475 // then we replace SETCC with a subtraction
12476 for (SDNode::use_iterator UI = N->use_begin(),
12477 UE = N->use_end(); UI != UE; ++UI) {
12478 if (UI->getOpcode() != ISD::ZERO_EXTEND)
12479 return SDValue();
12480 }
12481
12482 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12483 auto OpSize = N->getOperand(0).getValueSizeInBits();
12484
12486
12487 if (OpSize < Size) {
12488 switch (CC) {
12489 default: break;
12490 case ISD::SETULT:
12491 return generateEquivalentSub(N, Size, false, false, DL, DAG);
12492 case ISD::SETULE:
12493 return generateEquivalentSub(N, Size, true, true, DL, DAG);
12494 case ISD::SETUGT:
12495 return generateEquivalentSub(N, Size, false, true, DL, DAG);
12496 case ISD::SETUGE:
12497 return generateEquivalentSub(N, Size, true, false, DL, DAG);
12498 }
12499 }
12500
12501 return SDValue();
12502}
12503
12504SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
12505 DAGCombinerInfo &DCI) const {
12506 SelectionDAG &DAG = DCI.DAG;
12507 SDLoc dl(N);
12508
12509 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
12510 // If we're tracking CR bits, we need to be careful that we don't have:
12511 // trunc(binary-ops(zext(x), zext(y)))
12512 // or
12513 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
12514 // such that we're unnecessarily moving things into GPRs when it would be
12515 // better to keep them in CR bits.
12516
12517 // Note that trunc here can be an actual i1 trunc, or can be the effective
12518 // truncation that comes from a setcc or select_cc.
12519 if (N->getOpcode() == ISD::TRUNCATE &&
12520 N->getValueType(0) != MVT::i1)
12521 return SDValue();
12522
12523 if (N->getOperand(0).getValueType() != MVT::i32 &&
12524 N->getOperand(0).getValueType() != MVT::i64)
12525 return SDValue();
12526
12527 if (N->getOpcode() == ISD::SETCC ||
12528 N->getOpcode() == ISD::SELECT_CC) {
12529 // If we're looking at a comparison, then we need to make sure that the
12530 // high bits (all except for the first) don't matter the result.
12531 ISD::CondCode CC =
12532 cast<CondCodeSDNode>(N->getOperand(
12533 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
12534 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
12535
12536 if (ISD::isSignedIntSetCC(CC)) {
12537 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
12538 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
12539 return SDValue();
12540 } else if (ISD::isUnsignedIntSetCC(CC)) {
12541 if (!DAG.MaskedValueIsZero(N->getOperand(0),
12543 !DAG.MaskedValueIsZero(N->getOperand(1),
12545 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
12546 : SDValue());
12547 } else {
12548 // This is neither a signed nor an unsigned comparison, just make sure
12549 // that the high bits are equal.
12550 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
12551 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
12552
12553 // We don't really care about what is known about the first bit (if
12554 // anything), so pretend that it is known zero for both to ensure they can
12555 // be compared as constants.
12556 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
12557 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
12558
12559 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
12560 Op1Known.getConstant() != Op2Known.getConstant())
12561 return SDValue();
12562 }
12563 }
12564
12565 // We now know that the higher-order bits are irrelevant, we just need to
12566 // make sure that all of the intermediate operations are bit operations, and
12567 // all inputs are extensions.
12568 if (N->getOperand(0).getOpcode() != ISD::AND &&
12569 N->getOperand(0).getOpcode() != ISD::OR &&
12570 N->getOperand(0).getOpcode() != ISD::XOR &&
12571 N->getOperand(0).getOpcode() != ISD::SELECT &&
12572 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
12573 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
12574 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
12575 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
12576 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
12577 return SDValue();
12578
12579 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
12580 N->getOperand(1).getOpcode() != ISD::AND &&
12581 N->getOperand(1).getOpcode() != ISD::OR &&
12582 N->getOperand(1).getOpcode() != ISD::XOR &&
12583 N->getOperand(1).getOpcode() != ISD::SELECT &&
12584 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
12585 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
12586 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
12587 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
12588 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
12589 return SDValue();
12590
12594
12595 for (unsigned i = 0; i < 2; ++i) {
12596 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12597 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12598 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12599 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12600 isa<ConstantSDNode>(N->getOperand(i)))
12601 Inputs.push_back(N->getOperand(i));
12602 else
12603 BinOps.push_back(N->getOperand(i));
12604
12605 if (N->getOpcode() == ISD::TRUNCATE)
12606 break;
12607 }
12608
12609 // Visit all inputs, collect all binary operations (and, or, xor and
12610 // select) that are all fed by extensions.
12611 while (!BinOps.empty()) {
12612 SDValue BinOp = BinOps.pop_back_val();
12613
12614 if (!Visited.insert(BinOp.getNode()).second)
12615 continue;
12616
12617 PromOps.push_back(BinOp);
12618
12619 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12620 // The condition of the select is not promoted.
12621 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12622 continue;
12623 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12624 continue;
12625
12626 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12627 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12628 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12629 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12631 Inputs.push_back(BinOp.getOperand(i));
12632 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12633 BinOp.getOperand(i).getOpcode() == ISD::OR ||
12634 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12635 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12636 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
12637 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12638 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12639 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12640 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
12641 BinOps.push_back(BinOp.getOperand(i));
12642 } else {
12643 // We have an input that is not an extension or another binary
12644 // operation; we'll abort this transformation.
12645 return SDValue();
12646 }
12647 }
12648 }
12649
12650 // Make sure that this is a self-contained cluster of operations (which
12651 // is not quite the same thing as saying that everything has only one
12652 // use).
12653 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12654 if (isa<ConstantSDNode>(Inputs[i]))
12655 continue;
12656
12657 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12658 UE = Inputs[i].getNode()->use_end();
12659 UI != UE; ++UI) {
12660 SDNode *User = *UI;
12661 if (User != N && !Visited.count(User))
12662 return SDValue();
12663
12664 // Make sure that we're not going to promote the non-output-value
12665 // operand(s) or SELECT or SELECT_CC.
12666 // FIXME: Although we could sometimes handle this, and it does occur in
12667 // practice that one of the condition inputs to the select is also one of
12668 // the outputs, we currently can't deal with this.
12669 if (User->getOpcode() == ISD::SELECT) {
12670 if (User->getOperand(0) == Inputs[i])
12671 return SDValue();
12672 } else if (User->getOpcode() == ISD::SELECT_CC) {
12673 if (User->getOperand(0) == Inputs[i] ||
12674 User->getOperand(1) == Inputs[i])
12675 return SDValue();
12676 }
12677 }
12678 }
12679
12680 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12681 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12682 UE = PromOps[i].getNode()->use_end();
12683 UI != UE; ++UI) {
12684 SDNode *User = *UI;
12685 if (User != N && !Visited.count(User))
12686 return SDValue();
12687
12688 // Make sure that we're not going to promote the non-output-value
12689 // operand(s) or SELECT or SELECT_CC.
12690 // FIXME: Although we could sometimes handle this, and it does occur in
12691 // practice that one of the condition inputs to the select is also one of
12692 // the outputs, we currently can't deal with this.
12693 if (User->getOpcode() == ISD::SELECT) {
12694 if (User->getOperand(0) == PromOps[i])
12695 return SDValue();
12696 } else if (User->getOpcode() == ISD::SELECT_CC) {
12697 if (User->getOperand(0) == PromOps[i] ||
12698 User->getOperand(1) == PromOps[i])
12699 return SDValue();
12700 }
12701 }
12702 }
12703
12704 // Replace all inputs with the extension operand.
12705 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12706 // Constants may have users outside the cluster of to-be-promoted nodes,
12707 // and so we need to replace those as we do the promotions.
12708 if (isa<ConstantSDNode>(Inputs[i]))
12709 continue;
12710 else
12711 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12712 }
12713
12714 std::list<HandleSDNode> PromOpHandles;
12715 for (auto &PromOp : PromOps)
12716 PromOpHandles.emplace_back(PromOp);
12717
12718 // Replace all operations (these are all the same, but have a different
12719 // (i1) return type). DAG.getNode will validate that the types of
12720 // a binary operator match, so go through the list in reverse so that
12721 // we've likely promoted both operands first. Any intermediate truncations or
12722 // extensions disappear.
12723 while (!PromOpHandles.empty()) {
12724 SDValue PromOp = PromOpHandles.back().getValue();
12725 PromOpHandles.pop_back();
12726
12727 if (PromOp.getOpcode() == ISD::TRUNCATE ||
12728 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12729 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12730 PromOp.getOpcode() == ISD::ANY_EXTEND) {
12731 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12732 PromOp.getOperand(0).getValueType() != MVT::i1) {
12733 // The operand is not yet ready (see comment below).
12734 PromOpHandles.emplace_front(PromOp);
12735 continue;
12736 }
12737
12738 SDValue RepValue = PromOp.getOperand(0);
12741
12743 continue;
12744 }
12745
12746 unsigned C;
12747 switch (PromOp.getOpcode()) {
12748 default: C = 0; break;
12749 case ISD::SELECT: C = 1; break;
12750 case ISD::SELECT_CC: C = 2; break;
12751 }
12752
12753 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12754 PromOp.getOperand(C).getValueType() != MVT::i1) ||
12755 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12756 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12757 // The to-be-promoted operands of this node have not yet been
12758 // promoted (this should be rare because we're going through the
12759 // list backward, but if one of the operands has several users in
12760 // this cluster of to-be-promoted nodes, it is possible).
12761 PromOpHandles.emplace_front(PromOp);
12762 continue;
12763 }
12764
12765 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12766 PromOp.getNode()->op_end());
12767
12768 // If there are any constant inputs, make sure they're replaced now.
12769 for (unsigned i = 0; i < 2; ++i)
12770 if (isa<ConstantSDNode>(Ops[C+i]))
12771 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12772
12774 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12775 }
12776
12777 // Now we're left with the initial truncation itself.
12778 if (N->getOpcode() == ISD::TRUNCATE)
12779 return N->getOperand(0);
12780
12781 // Otherwise, this is a comparison. The operands to be compared have just
12782 // changed type (to i1), but everything else is the same.
12783 return SDValue(N, 0);
12784}
12785
12786SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12787 DAGCombinerInfo &DCI) const {
12788 SelectionDAG &DAG = DCI.DAG;
12789 SDLoc dl(N);
12790
12791 // If we're tracking CR bits, we need to be careful that we don't have:
12792 // zext(binary-ops(trunc(x), trunc(y)))
12793 // or
12794 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12795 // such that we're unnecessarily moving things into CR bits that can more
12796 // efficiently stay in GPRs. Note that if we're not certain that the high
12797 // bits are set as required by the final extension, we still may need to do
12798 // some masking to get the proper behavior.
12799
12800 // This same functionality is important on PPC64 when dealing with
12801 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12802 // the return values of functions. Because it is so similar, it is handled
12803 // here as well.
12804
12805 if (N->getValueType(0) != MVT::i32 &&
12806 N->getValueType(0) != MVT::i64)
12807 return SDValue();
12808
12809 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
12810 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
12811 return SDValue();
12812
12813 if (N->getOperand(0).getOpcode() != ISD::AND &&
12814 N->getOperand(0).getOpcode() != ISD::OR &&
12815 N->getOperand(0).getOpcode() != ISD::XOR &&
12816 N->getOperand(0).getOpcode() != ISD::SELECT &&
12817 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
12818 return SDValue();
12819
12821 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12823
12824 // Visit all inputs, collect all binary operations (and, or, xor and
12825 // select) that are all fed by truncations.
12826 while (!BinOps.empty()) {
12827 SDValue BinOp = BinOps.pop_back_val();
12828
12829 if (!Visited.insert(BinOp.getNode()).second)
12830 continue;
12831
12832 PromOps.push_back(BinOp);
12833
12834 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12835 // The condition of the select is not promoted.
12836 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12837 continue;
12838 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12839 continue;
12840
12841 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12843 Inputs.push_back(BinOp.getOperand(i));
12844 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12845 BinOp.getOperand(i).getOpcode() == ISD::OR ||
12846 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12847 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12848 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
12849 BinOps.push_back(BinOp.getOperand(i));
12850 } else {
12851 // We have an input that is not a truncation or another binary
12852 // operation; we'll abort this transformation.
12853 return SDValue();
12854 }
12855 }
12856 }
12857
12858 // The operands of a select that must be truncated when the select is
12859 // promoted because the operand is actually part of the to-be-promoted set.
12861
12862 // Make sure that this is a self-contained cluster of operations (which
12863 // is not quite the same thing as saying that everything has only one
12864 // use).
12865 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12866 if (isa<ConstantSDNode>(Inputs[i]))
12867 continue;
12868
12869 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12870 UE = Inputs[i].getNode()->use_end();
12871 UI != UE; ++UI) {
12872 SDNode *User = *UI;
12873 if (User != N && !Visited.count(User))
12874 return SDValue();
12875
12876 // If we're going to promote the non-output-value operand(s) or SELECT or
12877 // SELECT_CC, record them for truncation.
12878 if (User->getOpcode() == ISD::SELECT) {
12879 if (User->getOperand(0) == Inputs[i])
12880 SelectTruncOp[0].insert(std::make_pair(User,
12881 User->getOperand(0).getValueType()));
12882 } else if (User->getOpcode() == ISD::SELECT_CC) {
12883 if (User->getOperand(0) == Inputs[i])
12884 SelectTruncOp[0].insert(std::make_pair(User,
12885 User->getOperand(0).getValueType()));
12886 if (User->getOperand(1) == Inputs[i])
12887 SelectTruncOp[1].insert(std::make_pair(User,
12888 User->getOperand(1).getValueType()));
12889 }
12890 }
12891 }
12892
12893 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12894 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12895 UE = PromOps[i].getNode()->use_end();
12896 UI != UE; ++UI) {
12897 SDNode *User = *UI;
12898 if (User != N && !Visited.count(User))
12899 return SDValue();
12900
12901 // If we're going to promote the non-output-value operand(s) or SELECT or
12902 // SELECT_CC, record them for truncation.
12903 if (User->getOpcode() == ISD::SELECT) {
12904 if (User->getOperand(0) == PromOps[i])
12905 SelectTruncOp[0].insert(std::make_pair(User,
12906 User->getOperand(0).getValueType()));
12907 } else if (User->getOpcode() == ISD::SELECT_CC) {
12908 if (User->getOperand(0) == PromOps[i])
12909 SelectTruncOp[0].insert(std::make_pair(User,
12910 User->getOperand(0).getValueType()));
12911 if (User->getOperand(1) == PromOps[i])
12912 SelectTruncOp[1].insert(std::make_pair(User,
12913 User->getOperand(1).getValueType()));
12914 }
12915 }
12916 }
12917
12918 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12919 bool ReallyNeedsExt = false;
12920 if (N->getOpcode() != ISD::ANY_EXTEND) {
12921 // If all of the inputs are not already sign/zero extended, then
12922 // we'll still need to do that at the end.
12923 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12924 if (isa<ConstantSDNode>(Inputs[i]))
12925 continue;
12926
12927 unsigned OpBits =
12928 Inputs[i].getOperand(0).getValueSizeInBits();
12929 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
12930
12931 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
12932 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
12934 OpBits-PromBits))) ||
12935 (N->getOpcode() == ISD::SIGN_EXTEND &&
12936 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
12937 (OpBits-(PromBits-1)))) {
12938 ReallyNeedsExt = true;
12939 break;
12940 }
12941 }
12942 }
12943
12944 // Replace all inputs, either with the truncation operand, or a
12945 // truncation or extension to the final output type.
12946 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12947 // Constant inputs need to be replaced with the to-be-promoted nodes that
12948 // use them because they might have users outside of the cluster of
12949 // promoted nodes.
12950 if (isa<ConstantSDNode>(Inputs[i]))
12951 continue;
12952
12953 SDValue InSrc = Inputs[i].getOperand(0);
12954 if (Inputs[i].getValueType() == N->getValueType(0))
12955 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
12956 else if (N->getOpcode() == ISD::SIGN_EXTEND)
12957 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12958 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
12959 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12960 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12961 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
12962 else
12963 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12964 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
12965 }
12966
12967 std::list<HandleSDNode> PromOpHandles;
12968 for (auto &PromOp : PromOps)
12969 PromOpHandles.emplace_back(PromOp);
12970
12971 // Replace all operations (these are all the same, but have a different
12972 // (promoted) return type). DAG.getNode will validate that the types of
12973 // a binary operator match, so go through the list in reverse so that
12974 // we've likely promoted both operands first.
12975 while (!PromOpHandles.empty()) {
12976 SDValue PromOp = PromOpHandles.back().getValue();
12977 PromOpHandles.pop_back();
12978
12979 unsigned C;
12980 switch (PromOp.getOpcode()) {
12981 default: C = 0; break;
12982 case ISD::SELECT: C = 1; break;
12983 case ISD::SELECT_CC: C = 2; break;
12984 }
12985
12986 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12987 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
12988 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12989 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
12990 // The to-be-promoted operands of this node have not yet been
12991 // promoted (this should be rare because we're going through the
12992 // list backward, but if one of the operands has several users in
12993 // this cluster of to-be-promoted nodes, it is possible).
12994 PromOpHandles.emplace_front(PromOp);
12995 continue;
12996 }
12997
12998 // For SELECT and SELECT_CC nodes, we do a similar check for any
12999 // to-be-promoted comparison inputs.
13000 if (PromOp.getOpcode() == ISD::SELECT ||
13001 PromOp.getOpcode() == ISD::SELECT_CC) {
13002 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13003 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13004 (SelectTruncOp[1].count(PromOp.getNode()) &&
13005 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13006 PromOpHandles.emplace_front(PromOp);
13007 continue;
13008 }
13009 }
13010
13011 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13012 PromOp.getNode()->op_end());
13013
13014 // If this node has constant inputs, then they'll need to be promoted here.
13015 for (unsigned i = 0; i < 2; ++i) {
13016 if (!isa<ConstantSDNode>(Ops[C+i]))
13017 continue;
13018 if (Ops[C+i].getValueType() == N->getValueType(0))
13019 continue;
13020
13021 if (N->getOpcode() == ISD::SIGN_EXTEND)
13022 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13023 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13024 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13025 else
13026 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13027 }
13028
13029 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13030 // truncate them again to the original value type.
13031 if (PromOp.getOpcode() == ISD::SELECT ||
13032 PromOp.getOpcode() == ISD::SELECT_CC) {
13033 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13034 if (SI0 != SelectTruncOp[0].end())
13035 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13036 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13037 if (SI1 != SelectTruncOp[1].end())
13038 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13039 }
13040
13042 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13043 }
13044
13045 // Now we're left with the initial extension itself.
13046 if (!ReallyNeedsExt)
13047 return N->getOperand(0);
13048
13049 // To zero extend, just mask off everything except for the first bit (in the
13050 // i1 case).
13051 if (N->getOpcode() == ISD::ZERO_EXTEND)
13052 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13054 N->getValueSizeInBits(0), PromBits),
13055 dl, N->getValueType(0)));
13056
13057 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13058 "Invalid extension type");
13059 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13061 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13062 return DAG.getNode(
13063 ISD::SRA, dl, N->getValueType(0),
13064 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13065 ShiftCst);
13066}
13067
13068SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13069 DAGCombinerInfo &DCI) const {
13070 assert(N->getOpcode() == ISD::SETCC &&
13071 "Should be called with a SETCC node");
13072
13073 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13074 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13075 SDValue LHS = N->getOperand(0);
13076 SDValue RHS = N->getOperand(1);
13077
13078 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13079 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13080 LHS.hasOneUse())
13081 std::swap(LHS, RHS);
13082
13083 // x == 0-y --> x+y == 0
13084 // x != 0-y --> x+y != 0
13085 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13086 RHS.hasOneUse()) {
13087 SDLoc DL(N);
13088 SelectionDAG &DAG = DCI.DAG;
13089 EVT VT = N->getValueType(0);
13090 EVT OpVT = LHS.getValueType();
13091 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13092 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13093 }
13094 }
13095
13096 return DAGCombineTruncBoolExt(N, DCI);
13097}
13098
13099// Is this an extending load from an f32 to an f64?
13100static bool isFPExtLoad(SDValue Op) {
13101 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13102 return LD->getExtensionType() == ISD::EXTLOAD &&
13103 Op.getValueType() == MVT::f64;
13104 return false;
13105}
13106
13107/// Reduces the number of fp-to-int conversion when building a vector.
13108///
13109/// If this vector is built out of floating to integer conversions,
13110/// transform it to a vector built out of floating point values followed by a
13111/// single floating to integer conversion of the vector.
13112/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
13113/// becomes (fptosi (build_vector ($A, $B, ...)))
13114SDValue PPCTargetLowering::
13115combineElementTruncationToVectorTruncation(SDNode *N,
13116 DAGCombinerInfo &DCI) const {
13117 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13118 "Should be called with a BUILD_VECTOR node");
13119
13120 SelectionDAG &DAG = DCI.DAG;
13121 SDLoc dl(N);
13122
13123 SDValue FirstInput = N->getOperand(0);
13124 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13125 "The input operand must be an fp-to-int conversion.");
13126
13127 // This combine happens after legalization so the fp_to_[su]i nodes are
13128 // already converted to PPCSISD nodes.
13129 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13134 bool IsSplat = true;
13137 EVT SrcVT = FirstInput.getOperand(0).getValueType();
13139 EVT TargetVT = N->getValueType(0);
13140 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13141 SDValue NextOp = N->getOperand(i);
13142 if (NextOp.getOpcode() != PPCISD::MFVSR)
13143 return SDValue();
13144 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13146 return SDValue();
13147 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13148 // This is not valid if the input was originally double precision. It is
13149 // also not profitable to do unless this is an extending load in which
13150 // case doing this combine will allow us to combine consecutive loads.
13151 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13152 return SDValue();
13153 if (N->getOperand(i) != FirstInput)
13154 IsSplat = false;
13155 }
13156
13157 // If this is a splat, we leave it as-is since there will be only a single
13158 // fp-to-int conversion followed by a splat of the integer. This is better
13159 // for 32-bit and smaller ints and neutral for 64-bit ints.
13160 if (IsSplat)
13161 return SDValue();
13162
13163 // Now that we know we have the right type of node, get its operands
13164 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13165 SDValue In = N->getOperand(i).getOperand(0);
13166 if (Is32Bit) {
13167 // For 32-bit values, we need to add an FP_ROUND node (if we made it
13168 // here, we know that all inputs are extending loads so this is safe).
13169 if (In.isUndef())
13170 Ops.push_back(DAG.getUNDEF(SrcVT));
13171 else {
13172 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13173 MVT::f32, In.getOperand(0),
13174 DAG.getIntPtrConstant(1, dl));
13175 Ops.push_back(Trunc);
13176 }
13177 } else
13178 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13179 }
13180
13181 unsigned Opcode;
13184 Opcode = ISD::FP_TO_SINT;
13185 else
13186 Opcode = ISD::FP_TO_UINT;
13187
13189 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13190 return DAG.getNode(Opcode, dl, TargetVT, BV);
13191 }
13192 return SDValue();
13193}
13194
13195/// Reduce the number of loads when building a vector.
13196///
13197/// Building a vector out of multiple loads can be converted to a load
13198/// of the vector type if the loads are consecutive. If the loads are
13199/// consecutive but in descending order, a shuffle is added at the end
13200/// to reorder the vector.
13202 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13203 "Should be called with a BUILD_VECTOR node");
13204
13205 SDLoc dl(N);
13206
13207 // Return early for non byte-sized type, as they can't be consecutive.
13208 if (!N->getValueType(0).getVectorElementType().isByteSized())
13209 return SDValue();
13210
13211 bool InputsAreConsecutiveLoads = true;
13212 bool InputsAreReverseConsecutive = true;
13213 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13214 SDValue FirstInput = N->getOperand(0);
13215 bool IsRoundOfExtLoad = false;
13216
13217 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13218 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13219 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13220 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13221 }
13222 // Not a build vector of (possibly fp_rounded) loads.
13223 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13224 N->getNumOperands() == 1)
13225 return SDValue();
13226
13227 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13228 // If any inputs are fp_round(extload), they all must be.
13229 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13230 return SDValue();
13231
13232 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13233 N->getOperand(i);
13234 if (NextInput.getOpcode() != ISD::LOAD)
13235 return SDValue();
13236
13238 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13241
13242 // If any inputs are fp_round(extload), they all must be.
13243 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13244 return SDValue();
13245
13246 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13248 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13250
13251 // Exit early if the loads are neither consecutive nor reverse consecutive.
13253 return SDValue();
13254 }
13255
13257 "The loads cannot be both consecutive and reverse consecutive.");
13258
13260 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13262 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13263 N->getOperand(N->getNumOperands()-1);
13264
13268 assert(LD1 && "Input needs to be a LoadSDNode.");
13269 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13270 LD1->getBasePtr(), LD1->getPointerInfo(),
13271 LD1->getAlignment());
13272 }
13274 assert(LDL && "Input needs to be a LoadSDNode.");
13275 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13276 LDL->getBasePtr(), LDL->getPointerInfo(),
13277 LDL->getAlignment());
13279 for (int i = N->getNumOperands() - 1; i >= 0; i--)
13280 Ops.push_back(i);
13281
13282 return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13283 DAG.getUNDEF(N->getValueType(0)), Ops);
13284 }
13285 return SDValue();
13286}
13287
13288// This function adds the required vector_shuffle needed to get
13289// the elements of the vector extract in the correct position
13290// as specified by the CorrectElems encoding.
13292 SDValue Input, uint64_t Elems,
13293 uint64_t CorrectElems) {
13294 SDLoc dl(N);
13295
13296 unsigned NumElems = Input.getValueType().getVectorNumElements();
13297 SmallVector<int, 16> ShuffleMask(NumElems, -1);
13298
13299 // Knowing the element indices being extracted from the original
13300 // vector and the order in which they're being inserted, just put
13301 // them at element indices required for the instruction.
13302 for (unsigned i = 0; i < N->getNumOperands(); i++) {
13303 if (DAG.getDataLayout().isLittleEndian())
13304 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13305 else
13306 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13308 Elems = Elems >> 8;
13309 }
13310
13311 SDValue Shuffle =
13312 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13313 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13314
13315 EVT VT = N->getValueType(0);
13316 SDValue Conv = DAG.getBitcast(VT, Shuffle);
13317
13319 Input.getValueType().getVectorElementType(),
13321 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13322 DAG.getValueType(ExtVT));
13323}
13324
13325// Look for build vector patterns where input operands come from sign
13326// extended vector_extract elements of specific indices. If the correct indices
13327// aren't used, add a vector shuffle to fix up the indices and create
13328// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13329// during instruction selection.
13331 // This array encodes the indices that the vector sign extend instructions
13332 // extract from when extending from one type to another for both BE and LE.
13333 // The right nibble of each byte corresponds to the LE incides.
13334 // and the left nibble of each byte corresponds to the BE incides.
13335 // For example: 0x3074B8FC byte->word
13336 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13337 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13338 // For example: 0x000070F8 byte->double word
13339 // For LE: the allowed indices are: 0x0,0x8
13340 // For BE: the allowed indices are: 0x7,0xF
13341 uint64_t TargetElems[] = {
13342 0x3074B8FC, // b->w
13343 0x000070F8, // b->d
13344 0x10325476, // h->w
13345 0x00003074, // h->d
13346 0x00001032, // w->d
13347 };
13348
13349 uint64_t Elems = 0;
13350 int Index;
13351 SDValue Input;
13352
13353 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13354 if (!Op)
13355 return false;
13356 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13357 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13358 return false;
13359
13360 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13361 // of the right width.
13362 SDValue Extract = Op.getOperand(0);
13363 if (Extract.getOpcode() == ISD::ANY_EXTEND)
13364 Extract = Extract.getOperand(0);
13365 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13366 return false;
13367
13369 if (!ExtOp)
13370 return false;
13371
13372 Index = ExtOp->getZExtValue();
13373 if (Input && Input != Extract.getOperand(0))
13374 return false;
13375
13376 if (!Input)
13377 Input = Extract.getOperand(0);
13378
13379 Elems = Elems << 8;
13380 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13381 Elems |= Index;
13382
13383 return true;
13384 };
13385
13386 // If the build vector operands aren't sign extended vector extracts,
13387 // of the same input vector, then return.
13388 for (unsigned i = 0; i < N->getNumOperands(); i++) {
13389 if (!isSExtOfVecExtract(N->getOperand(i))) {
13390 return SDValue();
13391 }
13392 }
13393
13394 // If the vector extract indicies are not correct, add the appropriate
13395 // vector_shuffle.
13396 int TgtElemArrayIdx;
13397 int InputSize = Input.getValueType().getScalarSizeInBits();
13398 int OutputSize = N->getValueType(0).getScalarSizeInBits();
13399 if (InputSize + OutputSize == 40)
13400 TgtElemArrayIdx = 0;
13401 else if (InputSize + OutputSize == 72)
13402 TgtElemArrayIdx = 1;
13403 else if (InputSize + OutputSize == 48)
13404 TgtElemArrayIdx = 2;
13405 else if (InputSize + OutputSize == 80)
13406 TgtElemArrayIdx = 3;
13407 else if (InputSize + OutputSize == 96)
13408 TgtElemArrayIdx = 4;
13409 else
13410 return SDValue();
13411
13414 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13415 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13416 if (Elems != CorrectElems) {
13417 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13418 }
13419
13420 // Regular lowering will catch cases where a shuffle is not needed.
13421 return SDValue();
13422}
13423
13424// Look for the pattern of a load from a narrow width to i128, feeding
13425// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13426// (LXVRZX). This node represents a zero extending load that will be matched
13427// to the Load VSX Vector Rightmost instructions.
13429 SDLoc DL(N);
13430
13431 // This combine is only eligible for a BUILD_VECTOR of v1i128.
13432 if (N->getValueType(0) != MVT::v1i128)
13433 return SDValue();
13434
13435 SDValue Operand = N->getOperand(0);
13436 // Proceed with the transformation if the operand to the BUILD_VECTOR
13437 // is a load instruction.
13438 if (Operand.getOpcode() != ISD::LOAD)
13439 return SDValue();
13440
13441 LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
13442 EVT MemoryType = LD->getMemoryVT();
13443
13444 // This transformation is only valid if the we are loading either a byte,
13445 // halfword, word, or doubleword.
13446 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
13447 MemoryType == MVT::i32 || MemoryType == MVT::i64;
13448
13449 // Ensure that the load from the narrow width is being zero extended to i128.
13450 if (!ValidLDType ||
13451 (LD->getExtensionType() != ISD::ZEXTLOAD &&
13452 LD->getExtensionType() != ISD::EXTLOAD))
13453 return SDValue();
13454
13455 SDValue LoadOps[] = {
13456 LD->getChain(), LD->getBasePtr(),
13457 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
13458
13461 LoadOps, MemoryType, LD->getMemOperand());
13462}
13463
13464SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13465 DAGCombinerInfo &DCI) const {
13466 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13467 "Should be called with a BUILD_VECTOR node");
13468
13469 SelectionDAG &DAG = DCI.DAG;
13470 SDLoc dl(N);
13471
13472 if (!Subtarget.hasVSX())
13473 return SDValue();
13474
13475 // The target independent DAG combiner will leave a build_vector of
13476 // float-to-int conversions intact. We can generate MUCH better code for
13477 // a float-to-int conversion of a vector of floats.
13478 SDValue FirstInput = N->getOperand(0);
13479 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13480 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13481 if (Reduced)
13482 return Reduced;
13483 }
13484
13485 // If we're building a vector out of consecutive loads, just load that
13486 // vector type.
13488 if (Reduced)
13489 return Reduced;
13490
13491 // If we're building a vector out of extended elements from another vector
13492 // we have P9 vector integer extend instructions. The code assumes legal
13493 // input types (i.e. it can't handle things like v4i16) so do not run before
13494 // legalization.
13495 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13497 if (Reduced)
13498 return Reduced;
13499 }
13500
13501 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
13502 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
13503 // is a load from <valid narrow width> to i128.
13504 if (Subtarget.isISA3_1()) {
13506 if (BVOfZLoad)
13507 return BVOfZLoad;
13508 }
13509
13510 if (N->getValueType(0) != MVT::v2f64)
13511 return SDValue();
13512
13513 // Looking for:
13514 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
13515 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
13516 FirstInput.getOpcode() != ISD::UINT_TO_FP)
13517 return SDValue();
13518 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
13519 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
13520 return SDValue();
13521 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
13522 return SDValue();
13523
13524 SDValue Ext1 = FirstInput.getOperand(0);
13525 SDValue Ext2 = N->getOperand(1).getOperand(0);
13526 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13527 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13528 return SDValue();
13529
13532 if (!Ext1Op || !Ext2Op)
13533 return SDValue();
13534 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
13535 Ext1.getOperand(0) != Ext2.getOperand(0))
13536 return SDValue();
13537
13538 int FirstElem = Ext1Op->getZExtValue();
13539 int SecondElem = Ext2Op->getZExtValue();
13540 int SubvecIdx;
13541 if (FirstElem == 0 && SecondElem == 1)
13542 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
13543 else if (FirstElem == 2 && SecondElem == 3)
13544 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
13545 else
13546 return SDValue();
13547
13548 SDValue SrcVec = Ext1.getOperand(0);
13549 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
13551 return DAG.getNode(NodeType, dl, MVT::v2f64,
13553}
13554
13555SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
13556 DAGCombinerInfo &DCI) const {
13557 assert((N->getOpcode() == ISD::SINT_TO_FP ||
13558 N->getOpcode() == ISD::UINT_TO_FP) &&
13559 "Need an int -> FP conversion node here");
13560
13561 if (useSoftFloat() || !Subtarget.has64BitSupport())
13562 return SDValue();
13563
13564 SelectionDAG &DAG = DCI.DAG;
13565 SDLoc dl(N);
13566 SDValue Op(N, 0);
13567
13568 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
13569 // from the hardware.
13570 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
13571 return SDValue();
13572 if (!Op.getOperand(0).getValueType().isSimple())
13573 return SDValue();
13574 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
13575 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
13576 return SDValue();
13577
13578 SDValue FirstOperand(Op.getOperand(0));
13579 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
13580 (FirstOperand.getValueType() == MVT::i8 ||
13581 FirstOperand.getValueType() == MVT::i16);
13582 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
13583 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
13584 bool DstDouble = Op.getValueType() == MVT::f64;
13585 unsigned ConvOp = Signed ?
13587 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
13589 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
13590 dl, false);
13592 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
13595 Ops, MVT::i8, LDN->getMemOperand());
13596
13597 // For signed conversion, we need to sign-extend the value in the VSR
13598 if (Signed) {
13599 SDValue ExtOps[] = { Ld, WidthConst };
13600 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
13601 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
13602 } else
13603 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
13604 }
13605
13606
13607 // For i32 intermediate values, unfortunately, the conversion functions
13608 // leave the upper 32 bits of the value are undefined. Within the set of
13609 // scalar instructions, we have no method for zero- or sign-extending the
13610 // value. Thus, we cannot handle i32 intermediate values here.
13611 if (Op.getOperand(0).getValueType() == MVT::i32)
13612 return SDValue();
13613
13614 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
13615 "UINT_TO_FP is supported only with FPCVT");
13616
13617 // If we have FCFIDS, then use it when converting to single-precision.
13618 // Otherwise, convert to double-precision and then round.
13619 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13620 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
13622 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
13623 : PPCISD::FCFID);
13624 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13625 ? MVT::f32
13626 : MVT::f64;
13627
13628 // If we're converting from a float, to an int, and back to a float again,
13629 // then we don't need the store/load pair at all.
13630 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
13631 Subtarget.hasFPCVT()) ||
13632 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
13633 SDValue Src = Op.getOperand(0).getOperand(0);
13634 if (Src.getValueType() == MVT::f32) {
13635 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
13636 DCI.AddToWorklist(Src.getNode());
13637 } else if (Src.getValueType() != MVT::f64) {
13638 // Make sure that we don't pick up a ppc_fp128 source value.
13639 return SDValue();
13640 }
13641
13642 unsigned FCTOp =
13643 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
13645
13646 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
13647 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
13648
13649 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
13650 FP = DAG.getNode(ISD::FP_ROUND, dl,
13651 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
13652 DCI.AddToWorklist(FP.getNode());
13653 }
13654
13655 return FP;
13656 }
13657
13658 return SDValue();
13659}
13660
13661// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
13662// builtins) into loads with swaps.
13664 DAGCombinerInfo &DCI) const {
13665 SelectionDAG &DAG = DCI.DAG;
13666 SDLoc dl(N);
13667 SDValue Chain;
13668 SDValue Base;
13669 MachineMemOperand *MMO;
13670
13671 switch (N->getOpcode()) {
13672 default:
13673 llvm_unreachable("Unexpected opcode for little endian VSX load");
13674 case ISD::LOAD: {
13676 Chain = LD->getChain();
13677 Base = LD->getBasePtr();
13678 MMO = LD->getMemOperand();
13679 // If the MMO suggests this isn't a load of a full vector, leave
13680 // things alone. For a built-in, we have to make the change for
13681 // correctness, so if there is a size problem that will be a bug.
13682 if (MMO->getSize() < 16)
13683 return SDValue();
13684 break;
13685 }
13688 Chain = Intrin->getChain();
13689 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
13690 // us what we want. Get operand 2 instead.
13691 Base = Intrin->getOperand(2);
13692 MMO = Intrin->getMemOperand();
13693 break;
13694 }
13695 }
13696
13697 MVT VecTy = N->getValueType(0).getSimpleVT();
13698
13699 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
13700 // aligned and the type is a vector with elements up to 4 bytes
13701 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13702 VecTy.getScalarSizeInBits() <= 32) {
13703 return SDValue();
13704 }
13705
13706 SDValue LoadOps[] = { Chain, Base };
13709 LoadOps, MVT::v2f64, MMO);
13710
13711 DCI.AddToWorklist(Load.getNode());
13712 Chain = Load.getValue(1);
13713 SDValue Swap = DAG.getNode(
13714 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
13715 DCI.AddToWorklist(Swap.getNode());
13716
13717 // Add a bitcast if the resulting load type doesn't match v2f64.
13718 if (VecTy != MVT::v2f64) {
13719 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
13720 DCI.AddToWorklist(N.getNode());
13721 // Package {bitcast value, swap's chain} to match Load's shape.
13722 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
13723 N, Swap.getValue(1));
13724 }
13725
13726 return Swap;
13727}
13728
13729// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13730// builtins) into stores with swaps.
13732 DAGCombinerInfo &DCI) const {
13733 SelectionDAG &DAG = DCI.DAG;
13734 SDLoc dl(N);
13735 SDValue Chain;
13736 SDValue Base;
13737 unsigned SrcOpnd;
13738 MachineMemOperand *MMO;
13739
13740 switch (N->getOpcode()) {
13741 default:
13742 llvm_unreachable("Unexpected opcode for little endian VSX store");
13743 case ISD::STORE: {
13745 Chain = ST->getChain();
13746 Base = ST->getBasePtr();
13747 MMO = ST->getMemOperand();
13748 SrcOpnd = 1;
13749 // If the MMO suggests this isn't a store of a full vector, leave
13750 // things alone. For a built-in, we have to make the change for
13751 // correctness, so if there is a size problem that will be a bug.
13752 if (MMO->getSize() < 16)
13753 return SDValue();
13754 break;
13755 }
13756 case ISD::INTRINSIC_VOID: {
13758 Chain = Intrin->getChain();
13759 // Intrin->getBasePtr() oddly does not get what we want.
13760 Base = Intrin->getOperand(3);
13761 MMO = Intrin->getMemOperand();
13762 SrcOpnd = 2;
13763 break;
13764 }
13765 }
13766
13767 SDValue Src = N->getOperand(SrcOpnd);
13768 MVT VecTy = Src.getValueType().getSimpleVT();
13769
13770 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13771 // aligned and the type is a vector with elements up to 4 bytes
13772 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13773 VecTy.getScalarSizeInBits() <= 32) {
13774 return SDValue();
13775 }
13776
13777 // All stores are done as v2f64 and possible bit cast.
13778 if (VecTy != MVT::v2f64) {
13779 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13780 DCI.AddToWorklist(Src.getNode());
13781 }
13782
13783 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13784 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13785 DCI.AddToWorklist(Swap.getNode());
13786 Chain = Swap.getValue(1);
13787 SDValue StoreOps[] = { Chain, Swap, Base };
13789 DAG.getVTList(MVT::Other),
13790 StoreOps, VecTy, MMO);
13791 DCI.AddToWorklist(Store.getNode());
13792 return Store;
13793}
13794
13795// Handle DAG combine for STORE (FP_TO_INT F).
13796SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13797 DAGCombinerInfo &DCI) const {
13798
13799 SelectionDAG &DAG = DCI.DAG;
13800 SDLoc dl(N);
13801 unsigned Opcode = N->getOperand(1).getOpcode();
13802
13803 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13804 && "Not a FP_TO_INT Instruction!");
13805
13806 SDValue Val = N->getOperand(1).getOperand(0);
13807 EVT Op1VT = N->getOperand(1).getValueType();
13808 EVT ResVT = Val.getValueType();
13809
13810 if (!isTypeLegal(ResVT))
13811 return SDValue();
13812
13813 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13815 (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
13816 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
13817
13818 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
13819 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
13820 return SDValue();
13821
13822 // Extend f32 values to f64
13823 if (ResVT.getScalarSizeInBits() == 32) {
13824 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13825 DCI.AddToWorklist(Val.getNode());
13826 }
13827
13828 // Set signed or unsigned conversion opcode.
13829 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13831 PPCISD::FP_TO_UINT_IN_VSR;
13832
13833 Val = DAG.getNode(ConvOpcode,
13834 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
13835 DCI.AddToWorklist(Val.getNode());
13836
13837 // Set number of bytes being converted.
13838 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13839 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13840 DAG.getIntPtrConstant(ByteSize, dl, false),
13841 DAG.getValueType(Op1VT) };
13842
13844 DAG.getVTList(MVT::Other), Ops,
13845 cast<StoreSDNode>(N)->getMemoryVT(),
13846 cast<StoreSDNode>(N)->getMemOperand());
13847
13848 DCI.AddToWorklist(Val.getNode());
13849 return Val;
13850}
13851
13852static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
13853 // Check that the source of the element keeps flipping
13854 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
13855 bool PrevElemFromFirstVec = Mask[0] < NumElts;
13856 for (int i = 1, e = Mask.size(); i < e; i++) {
13857 if (PrevElemFromFirstVec && Mask[i] < NumElts)
13858 return false;
13859 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
13860 return false;
13862 }
13863 return true;
13864}
13865
13866static bool isSplatBV(SDValue Op) {
13867 if (Op.getOpcode() != ISD::BUILD_VECTOR)
13868 return false;
13869 SDValue FirstOp;
13870
13871 // Find first non-undef input.
13872 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
13873 FirstOp = Op.getOperand(i);
13874 if (!FirstOp.isUndef())
13875 break;
13876 }
13877
13878 // All inputs are undef or the same as the first non-undef input.
13879 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
13880 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
13881 return false;
13882 return true;
13883}
13884
13886 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13887 return Op;
13888 if (Op.getOpcode() != ISD::BITCAST)
13889 return SDValue();
13890 Op = Op.getOperand(0);
13891 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13892 return Op;
13893 return SDValue();
13894}
13895
13897 int LHSMaxIdx, int RHSMinIdx,
13898 int RHSMaxIdx, int HalfVec) {
13899 for (int i = 0, e = ShuffV.size(); i < e; i++) {
13900 int Idx = ShuffV[i];
13901 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
13902 ShuffV[i] += HalfVec;
13903 }
13904}
13905
13906// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
13907// the original is:
13908// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
13909// In such a case, just change the shuffle mask to extract the element
13910// from the permuted index.
13912 SDLoc dl(OrigSToV);
13913 EVT VT = OrigSToV.getValueType();
13914 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13915 "Expecting a SCALAR_TO_VECTOR here");
13916 SDValue Input = OrigSToV.getOperand(0);
13917
13918 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13919 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
13920 SDValue OrigVector = Input.getOperand(0);
13921
13922 // Can't handle non-const element indices or different vector types
13923 // for the input to the extract and the output of the scalar_to_vector.
13924 if (Idx && VT == OrigVector.getValueType()) {
13926 NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
13927 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
13928 }
13929 }
13930 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
13931 OrigSToV.getOperand(0));
13932}
13933
13934// On little endian subtargets, combine shuffles such as:
13935// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
13936// into:
13937// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
13938// because the latter can be matched to a single instruction merge.
13939// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
13940// to put the value into element zero. Adjust the shuffle mask so that the
13941// vector can remain in permuted form (to prevent a swap prior to a shuffle).
13942SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
13943 SelectionDAG &DAG) const {
13944 SDValue LHS = SVN->getOperand(0);
13945 SDValue RHS = SVN->getOperand(1);
13946 auto Mask = SVN->getMask();
13947 int NumElts = LHS.getValueType().getVectorNumElements();
13948 SDValue Res(SVN, 0);
13949 SDLoc dl(SVN);
13950
13951 // None of these combines are useful on big endian systems since the ISA
13952 // already has a big endian bias.
13953 if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
13954 return Res;
13955
13956 // If this is not a shuffle of a shuffle and the first element comes from
13957 // the second vector, canonicalize to the commuted form. This will make it
13958 // more likely to match one of the single instruction patterns.
13959 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
13960 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
13961 std::swap(LHS, RHS);
13962 Res = DAG.getCommutedVectorShuffle(*SVN);
13963 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
13964 }
13965
13966 // Adjust the shuffle mask if either input vector comes from a
13967 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
13968 // form (to prevent the need for a swap).
13969 SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
13972 if (SToVLHS || SToVRHS) {
13973 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
13974 : SToVRHS.getValueType().getVectorNumElements();
13975 int NumEltsOut = ShuffV.size();
13976
13977 // Initially assume that neither input is permuted. These will be adjusted
13978 // accordingly if either input is.
13979 int LHSMaxIdx = -1;
13980 int RHSMinIdx = -1;
13981 int RHSMaxIdx = -1;
13982 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
13983
13984 // Get the permuted scalar to vector nodes for the source(s) that come from
13985 // ISD::SCALAR_TO_VECTOR.
13986 if (SToVLHS) {
13987 // Set up the values for the shuffle vector fixup.
13990 if (SToVLHS.getValueType() != LHS.getValueType())
13991 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
13992 LHS = SToVLHS;
13993 }
13994 if (SToVRHS) {
13998 if (SToVRHS.getValueType() != RHS.getValueType())
13999 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14000 RHS = SToVRHS;
14001 }
14002
14003 // Fix up the shuffle mask to reflect where the desired element actually is.
14004 // The minimum and maximum indices that correspond to element zero for both
14005 // the LHS and RHS are computed and will control which shuffle mask entries
14006 // are to be changed. For example, if the RHS is permuted, any shuffle mask
14007 // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14008 // HalfVec to refer to the corresponding element in the permuted vector.
14010 HalfVec);
14011 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14012
14013 // We may have simplified away the shuffle. We won't be able to do anything
14014 // further with it here.
14015 if (!isa<ShuffleVectorSDNode>(Res))
14016 return Res;
14017 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14018 }
14019
14020 // The common case after we commuted the shuffle is that the RHS is a splat
14021 // and we have elements coming in from the splat at indices that are not
14022 // conducive to using a merge.
14023 // Example:
14024 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14025 if (!isSplatBV(RHS))
14026 return Res;
14027
14028 // We are looking for a mask such that all even elements are from
14029 // one vector and all odd elements from the other.
14030 if (!isAlternatingShuffMask(Mask, NumElts))
14031 return Res;
14032
14033 // Adjust the mask so we are pulling in the same index from the splat
14034 // as the index from the interesting vector in consecutive elements.
14035 // Example (even elements from first vector):
14036 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14037 if (Mask[0] < NumElts)
14038 for (int i = 1, e = Mask.size(); i < e; i += 2)
14039 ShuffV[i] = (ShuffV[i - 1] + NumElts);
14040 // Example (odd elements from first vector):
14041 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14042 else
14043 for (int i = 0, e = Mask.size(); i < e; i += 2)
14044 ShuffV[i] = (ShuffV[i + 1] + NumElts);
14045
14046 // If the RHS has undefs, we need to remove them since we may have created
14047 // a shuffle that adds those instead of the splat value.
14048 SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14049 RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14050
14051 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14052 return Res;
14053}
14054
14055SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14057 DAGCombinerInfo &DCI) const {
14059 "Not a reverse memop pattern!");
14060
14061 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14062 auto Mask = SVN->getMask();
14063 int i = 0;
14064 auto I = Mask.rbegin();
14065 auto E = Mask.rend();
14066
14067 for (; I != E; ++I) {
14068 if (*I != i)
14069 return false;
14070 i++;
14071 }
14072 return true;
14073 };
14074
14075 SelectionDAG &DAG = DCI.DAG;
14076 EVT VT = SVN->getValueType(0);
14077
14078 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14079 return SDValue();
14080
14081 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14082 // See comment in PPCVSXSwapRemoval.cpp.
14083 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14084 if (!Subtarget.hasP9Vector())
14085 return SDValue();
14086
14087 if(!IsElementReverse(SVN))
14088 return SDValue();
14089
14090 if (LSBase->getOpcode() == ISD::LOAD) {
14091 SDLoc dl(SVN);
14092 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14093 return DAG.getMemIntrinsicNode(
14095 LSBase->getMemoryVT(), LSBase->getMemOperand());
14096 }
14097
14098 if (LSBase->getOpcode() == ISD::STORE) {
14099 SDLoc dl(LSBase);
14100 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14101 LSBase->getBasePtr()};
14102 return DAG.getMemIntrinsicNode(
14104 LSBase->getMemoryVT(), LSBase->getMemOperand());
14105 }
14106
14107 llvm_unreachable("Expected a load or store node here");
14108}
14109
14111 DAGCombinerInfo &DCI) const {
14112 SelectionDAG &DAG = DCI.DAG;
14113 SDLoc dl(N);
14114 switch (N->getOpcode()) {
14115 default: break;
14116 case ISD::ADD:
14117 return combineADD(N, DCI);
14118 case ISD::SHL:
14119 return combineSHL(N, DCI);
14120 case ISD::SRA:
14121 return combineSRA(N, DCI);
14122 case ISD::SRL:
14123 return combineSRL(N, DCI);
14124 case ISD::MUL:
14125 return combineMUL(N, DCI);
14126 case ISD::FMA:
14127 case PPCISD::FNMSUB:
14128 return combineFMALike(N, DCI);
14129 case PPCISD::SHL:
14130 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14131 return N->getOperand(0);
14132 break;
14133 case PPCISD::SRL:
14134 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14135 return N->getOperand(0);
14136 break;
14137 case PPCISD::SRA:
14138 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14139 if (C->isNullValue() || // 0 >>s V -> 0.
14140 C->isAllOnesValue()) // -1 >>s V -> -1.
14141 return N->getOperand(0);
14142 }
14143 break;
14144 case ISD::SIGN_EXTEND:
14145 case ISD::ZERO_EXTEND:
14146 case ISD::ANY_EXTEND:
14147 return DAGCombineExtBoolTrunc(N, DCI);
14148 case ISD::TRUNCATE:
14149 return combineTRUNCATE(N, DCI);
14150 case ISD::SETCC:
14151 if (SDValue CSCC = combineSetCC(N, DCI))
14152 return CSCC;
14154 case ISD::SELECT_CC:
14155 return DAGCombineTruncBoolExt(N, DCI);
14156 case ISD::SINT_TO_FP:
14157 case ISD::UINT_TO_FP:
14158 return combineFPToIntToFP(N, DCI);
14160 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14161 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14162 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14163 }
14164 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14165 case ISD::STORE: {
14166
14167 EVT Op1VT = N->getOperand(1).getValueType();
14168 unsigned Opcode = N->getOperand(1).getOpcode();
14169
14170 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14171 SDValue Val= combineStoreFPToInt(N, DCI);
14172 if (Val)
14173 return Val;
14174 }
14175
14176 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14178 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14179 if (Val)
14180 return Val;
14181 }
14182
14183 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14184 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14185 N->getOperand(1).getNode()->hasOneUse() &&
14186 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14187 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14188
14189 // STBRX can only handle simple types and it makes no sense to store less
14190 // two bytes in byte-reversed order.
14191 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14192 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14193 break;
14194
14195 SDValue BSwapOp = N->getOperand(1).getOperand(0);
14196 // Do an any-extend to 32-bits if this is a half-word input.
14197 if (BSwapOp.getValueType() == MVT::i16)
14199
14200 // If the type of BSWAP operand is wider than stored memory width
14201 // it need to be shifted to the right side before STBRX.
14202 if (Op1VT.bitsGT(mVT)) {
14203 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14204 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14205 DAG.getConstant(Shift, dl, MVT::i32));
14206 // Need to truncate if this is a bswap of i64 stored as i32/i16.
14207 if (Op1VT == MVT::i64)
14209 }
14210
14211 SDValue Ops[] = {
14212 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14213 };
14214 return
14216 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14217 cast<StoreSDNode>(N)->getMemOperand());
14218 }
14219
14220 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
14221 // So it can increase the chance of CSE constant construction.
14222 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14223 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14224 // Need to sign-extended to 64-bits to handle negative values.
14225 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14226 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14227 MemVT.getSizeInBits());
14229
14230 // DAG.getTruncStore() can't be used here because it doesn't accept
14231 // the general (base + offset) addressing mode.
14232 // So we use UpdateNodeOperands and setTruncatingStore instead.
14233 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14234 N->getOperand(3));
14235 cast<StoreSDNode>(N)->setTruncatingStore(true);
14236 return SDValue(N, 0);
14237 }
14238
14239 // For little endian, VSX stores require generating xxswapd/lxvd2x.
14240 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14241 if (Op1VT.isSimple()) {
14242 MVT StoreVT = Op1VT.getSimpleVT();
14243 if (Subtarget.needsSwapsForVSXMemOps() &&
14246 return expandVSXStoreForLE(N, DCI);
14247 }
14248 break;
14249 }
14250 case ISD::LOAD: {
14252 EVT VT = LD->getValueType(0);
14253
14254 // For little endian, VSX loads require generating lxvd2x/xxswapd.
14255 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14256 if (VT.isSimple()) {
14257 MVT LoadVT = VT.getSimpleVT();
14258 if (Subtarget.needsSwapsForVSXMemOps() &&
14259 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14261 return expandVSXLoadForLE(N, DCI);
14262 }
14263
14264 // We sometimes end up with a 64-bit integer load, from which we extract
14265 // two single-precision floating-point numbers. This happens with
14266 // std::complex<float>, and other similar structures, because of the way we
14267 // canonicalize structure copies. However, if we lack direct moves,
14268 // then the final bitcasts from the extracted integer values to the
14269 // floating-point numbers turn into store/load pairs. Even with direct moves,
14270 // just loading the two floating-point numbers is likely better.
14271 auto ReplaceTwoFloatLoad = [&]() {
14272 if (VT != MVT::i64)
14273 return false;
14274
14275 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14276 LD->isVolatile())
14277 return false;
14278
14279 // We're looking for a sequence like this:
14280 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14281 // t16: i64 = srl t13, Constant:i32<32>
14282 // t17: i32 = truncate t16
14283 // t18: f32 = bitcast t17
14284 // t19: i32 = truncate t13
14285 // t20: f32 = bitcast t19
14286
14287 if (!LD->hasNUsesOfValue(2, 0))
14288 return false;
14289
14290 auto UI = LD->use_begin();
14291 while (UI.getUse().getResNo() != 0) ++UI;
14292 SDNode *Trunc = *UI++;
14293 while (UI.getUse().getResNo() != 0) ++UI;
14294 SDNode *RightShift = *UI;
14295 if (Trunc->getOpcode() != ISD::TRUNCATE)
14296 std::swap(Trunc, RightShift);
14297
14298 if (Trunc->getOpcode() != ISD::TRUNCATE ||
14299 Trunc->getValueType(0) != MVT::i32 ||
14300 !Trunc->hasOneUse())
14301 return false;
14302 if (RightShift->getOpcode() != ISD::SRL ||
14303 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14304 RightShift->getConstantOperandVal(1) != 32 ||
14305 !RightShift->hasOneUse())
14306 return false;
14307
14308 SDNode *Trunc2 = *RightShift->use_begin();
14309 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14310 Trunc2->getValueType(0) != MVT::i32 ||
14311 !Trunc2->hasOneUse())
14312 return false;
14313
14314 SDNode *Bitcast = *Trunc->use_begin();
14315 SDNode *Bitcast2 = *Trunc2->use_begin();
14316
14317 if (Bitcast->getOpcode() != ISD::BITCAST ||
14318 Bitcast->getValueType(0) != MVT::f32)
14319 return false;
14320 if (Bitcast2->getOpcode() != ISD::BITCAST ||
14321 Bitcast2->getValueType(0) != MVT::f32)
14322 return false;
14323
14324 if (Subtarget.isLittleEndian())
14325 std::swap(Bitcast, Bitcast2);
14326
14327 // Bitcast has the second float (in memory-layout order) and Bitcast2
14328 // has the first one.
14329
14330 SDValue BasePtr = LD->getBasePtr();
14331 if (LD->isIndexed()) {
14332 assert(LD->getAddressingMode() == ISD::PRE_INC &&
14333 "Non-pre-inc AM on PPC?");
14334 BasePtr =
14335 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14336 LD->getOffset());
14337 }
14338
14339 auto MMOFlags =
14340 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14341 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14342 LD->getPointerInfo(), LD->getAlignment(),
14343 MMOFlags, LD->getAAInfo());
14344 SDValue AddPtr =
14345 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14346 BasePtr, DAG.getIntPtrConstant(4, dl));
14348 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14349 LD->getPointerInfo().getWithOffset(4),
14350 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14351
14352 if (LD->isIndexed()) {
14353 // Note that DAGCombine should re-form any pre-increment load(s) from
14354 // what is produced here if that makes sense.
14355 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14356 }
14357
14358 DCI.CombineTo(Bitcast2, FloatLoad);
14359 DCI.CombineTo(Bitcast, FloatLoad2);
14360
14361 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14362 SDValue(FloatLoad2.getNode(), 1));
14363 return true;
14364 };
14365
14366 if (ReplaceTwoFloatLoad())
14367 return SDValue(N, 0);
14368
14369 EVT MemVT = LD->getMemoryVT();
14370 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14372 if (LD->isUnindexed() && VT.isVector() &&
14373 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14374 // P8 and later hardware should just use LOAD.
14375 !Subtarget.hasP8Vector() &&
14376 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14377 VT == MVT::v4f32))) &&
14378 LD->getAlign() < ABIAlignment) {
14379 // This is a type-legal unaligned Altivec load.
14380 SDValue Chain = LD->getChain();
14381 SDValue Ptr = LD->getBasePtr();
14382 bool isLittleEndian = Subtarget.isLittleEndian();
14383
14384 // This implements the loading of unaligned vectors as described in
14385 // the venerable Apple Velocity Engine overview. Specifically:
14386 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14387 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14388 //
14389 // The general idea is to expand a sequence of one or more unaligned
14390 // loads into an alignment-based permutation-control instruction (lvsl
14391 // or lvsr), a series of regular vector loads (which always truncate
14392 // their input address to an aligned address), and a series of
14393 // permutations. The results of these permutations are the requested
14394 // loaded values. The trick is that the last "extra" load is not taken
14395 // from the address you might suspect (sizeof(vector) bytes after the
14396 // last requested load), but rather sizeof(vector) - 1 bytes after the
14397 // last requested vector. The point of this is to avoid a page fault if
14398 // the base address happened to be aligned. This works because if the
14399 // base address is aligned, then adding less than a full vector length
14400 // will cause the last vector in the sequence to be (re)loaded.
14401 // Otherwise, the next vector will be fetched as you might suspect was
14402 // necessary.
14403
14404 // We might be able to reuse the permutation generation from
14405 // a different base address offset from this one by an aligned amount.
14406 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14407 // optimization later.
14410 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14411 : Intrinsic::ppc_altivec_lvsl;
14412 IntrLD = Intrinsic::ppc_altivec_lvx;
14413 IntrPerm = Intrinsic::ppc_altivec_vperm;
14416 LDTy = MVT::v4i32;
14417
14419
14420 // Create the new MMO for the new base load. It is like the original MMO,
14421 // but represents an area in memory almost twice the vector size centered
14422 // on the original address. If the address is unaligned, we might start
14423 // reading up to (sizeof(vector)-1) bytes below the address of the
14424 // original unaligned load.
14427 MF.getMachineMemOperand(LD->getMemOperand(),
14428 -(long)MemVT.getStoreSize()+1,
14429 2*MemVT.getStoreSize()-1);
14430
14431 // Create the new base load.
14434 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14439
14440 // Note that the value of IncOffset (which is provided to the next
14441 // load's pointer info offset value, and thus used to calculate the
14442 // alignment), and the value of IncValue (which is actually used to
14443 // increment the pointer value) are different! This is because we
14444 // require the next load to appear to be aligned, even though it
14445 // is actually offset from the base pointer by a lesser amount.
14446 int IncOffset = VT.getSizeInBits() / 8;
14447 int IncValue = IncOffset;
14448
14449 // Walk (both up and down) the chain looking for another load at the real
14450 // (aligned) offset (the alignment of the other load does not matter in
14451 // this case). If found, then do not use the offset reduction trick, as
14452 // that will prevent the loads from being later combined (as they would
14453 // otherwise be duplicates).
14454 if (!findConsecutiveLoad(LD, DAG))
14455 --IncValue;
14456
14457 SDValue Increment =
14459 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14460
14462 MF.getMachineMemOperand(LD->getMemOperand(),
14463 1, 2*MemVT.getStoreSize()-1);
14464 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14469
14471 BaseLoad.getValue(1), ExtraLoad.getValue(1));
14472
14473 // Because vperm has a big-endian bias, we must reverse the order
14474 // of the input vectors and complement the permute control vector
14475 // when generating little endian code. We have already handled the
14476 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14477 // and ExtraLoad here.
14478 SDValue Perm;
14479 if (isLittleEndian)
14481 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14482 else
14484 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14485
14486 if (VT != PermTy)
14487 Perm = Subtarget.hasAltivec()
14488 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
14489 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
14490 DAG.getTargetConstant(1, dl, MVT::i64));
14491 // second argument is 1 because this rounding
14492 // is always exact.
14493
14494 // The output of the permutation is our loaded result, the TokenFactor is
14495 // our new chain.
14496 DCI.CombineTo(N, Perm, TF);
14497 return SDValue(N, 0);
14498 }
14499 }
14500 break;
14502 bool isLittleEndian = Subtarget.isLittleEndian();
14503 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
14504 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14505 : Intrinsic::ppc_altivec_lvsl);
14506 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
14507 SDValue Add = N->getOperand(1);
14508
14509 int Bits = 4 /* 16 byte alignment */;
14510
14511 if (DAG.MaskedValueIsZero(Add->getOperand(1),
14512 APInt::getAllOnesValue(Bits /* alignment */)
14513 .zext(Add.getScalarValueSizeInBits()))) {
14514 SDNode *BasePtr = Add->getOperand(0).getNode();
14515 for (SDNode::use_iterator UI = BasePtr->use_begin(),
14516 UE = BasePtr->use_end();
14517 UI != UE; ++UI) {
14518 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14519 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
14520 IID) {
14521 // We've found another LVSL/LVSR, and this address is an aligned
14522 // multiple of that one. The results will be the same, so use the
14523 // one we've just found instead.
14524
14525 return SDValue(*UI, 0);
14526 }
14527 }
14528 }
14529
14530 if (isa<ConstantSDNode>(Add->getOperand(1))) {
14531 SDNode *BasePtr = Add->getOperand(0).getNode();
14532 for (SDNode::use_iterator UI = BasePtr->use_begin(),
14533 UE = BasePtr->use_end(); UI != UE; ++UI) {
14534 if (UI->getOpcode() == ISD::ADD &&
14535 isa<ConstantSDNode>(UI->getOperand(1)) &&
14536 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
14537 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
14538 (1ULL << Bits) == 0) {
14539 SDNode *OtherAdd = *UI;
14540 for (SDNode::use_iterator VI = OtherAdd->use_begin(),
14541 VE = OtherAdd->use_end(); VI != VE; ++VI) {
14542 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14543 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
14544 return SDValue(*VI, 0);
14545 }
14546 }
14547 }
14548 }
14549 }
14550 }
14551
14552 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
14553 // Expose the vabsduw/h/b opportunity for down stream
14554 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
14555 (IID == Intrinsic::ppc_altivec_vmaxsw ||
14556 IID == Intrinsic::ppc_altivec_vmaxsh ||
14557 IID == Intrinsic::ppc_altivec_vmaxsb)) {
14558 SDValue V1 = N->getOperand(1);
14559 SDValue V2 = N->getOperand(2);
14560 if ((V1.getSimpleValueType() == MVT::v4i32 ||
14561 V1.getSimpleValueType() == MVT::v8i16 ||
14562 V1.getSimpleValueType() == MVT::v16i8) &&
14563 V1.getSimpleValueType() == V2.getSimpleValueType()) {
14564 // (0-a, a)
14565 if (V1.getOpcode() == ISD::SUB &&
14566 ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
14567 V1.getOperand(1) == V2) {
14568 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
14569 }
14570 // (a, 0-a)
14571 if (V2.getOpcode() == ISD::SUB &&
14572 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
14573 V2.getOperand(1) == V1) {
14574 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14575 }
14576 // (x-y, y-x)
14577 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
14578 V1.getOperand(0) == V2.getOperand(1) &&
14579 V1.getOperand(1) == V2.getOperand(0)) {
14580 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14581 }
14582 }
14583 }
14584 }
14585
14586 break;
14588 // For little endian, VSX loads require generating lxvd2x/xxswapd.
14589 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14590 if (Subtarget.needsSwapsForVSXMemOps()) {
14591 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14592 default:
14593 break;
14594 case Intrinsic::ppc_vsx_lxvw4x:
14595 case Intrinsic::ppc_vsx_lxvd2x:
14596 return expandVSXLoadForLE(N, DCI);
14597 }
14598 }
14599 break;
14601 // For little endian, VSX stores require generating xxswapd/stxvd2x.
14602 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14603 if (Subtarget.needsSwapsForVSXMemOps()) {
14604 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14605 default:
14606 break;
14607 case Intrinsic::ppc_vsx_stxvw4x:
14608 case Intrinsic::ppc_vsx_stxvd2x:
14609 return expandVSXStoreForLE(N, DCI);
14610 }
14611 }
14612 break;
14613 case ISD::BSWAP:
14614 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
14615 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
14616 N->getOperand(0).hasOneUse() &&
14617 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
14618 (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
14619 N->getValueType(0) == MVT::i64))) {
14620 SDValue Load = N->getOperand(0);
14621 LoadSDNode *LD = cast<LoadSDNode>(Load);
14622 // Create the byte-swapping load.
14623 SDValue Ops[] = {
14624 LD->getChain(), // Chain
14625 LD->getBasePtr(), // Ptr
14626 DAG.getValueType(N->getValueType(0)) // VT
14627 };
14628 SDValue BSLoad =
14630 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
14632 Ops, LD->getMemoryVT(), LD->getMemOperand());
14633
14634 // If this is an i16 load, insert the truncate.
14636 if (N->getValueType(0) == MVT::i16)
14638
14639 // First, combine the bswap away. This makes the value produced by the
14640 // load dead.
14641 DCI.CombineTo(N, ResVal);
14642
14643 // Next, combine the load away, we give it a bogus result value but a real
14644 // chain result. The result value is dead because the bswap is dead.
14645 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
14646
14647 // Return N so it doesn't get rechecked!
14648 return SDValue(N, 0);
14649 }
14650 break;
14651 case PPCISD::VCMP:
14652 // If a VCMP_rec node already exists with exactly the same operands as this
14653 // node, use its result instead of this node (VCMP_rec computes both a CR6
14654 // and a normal output).
14655 //
14656 if (!N->getOperand(0).hasOneUse() &&
14657 !N->getOperand(1).hasOneUse() &&
14658 !N->getOperand(2).hasOneUse()) {
14659
14660 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
14661 SDNode *VCMPrecNode = nullptr;
14662
14663 SDNode *LHSN = N->getOperand(0).getNode();
14664 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
14665 UI != E; ++UI)
14666 if (UI->getOpcode() == PPCISD::VCMP_rec &&
14667 UI->getOperand(1) == N->getOperand(1) &&
14668 UI->getOperand(2) == N->getOperand(2) &&
14669 UI->getOperand(0) == N->getOperand(0)) {
14670 VCMPrecNode = *UI;
14671 break;
14672 }
14673
14674 // If there is no VCMP_rec node, or if the flag value has a single use,
14675 // don't transform this.
14676 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
14677 break;
14678
14679 // Look at the (necessarily single) use of the flag value. If it has a
14680 // chain, this transformation is more complex. Note that multiple things
14681 // could use the value result, which we should ignore.
14682 SDNode *FlagUser = nullptr;
14683 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
14684 FlagUser == nullptr; ++UI) {
14685 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
14686 SDNode *User = *UI;
14687 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
14688 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
14689 FlagUser = User;
14690 break;
14691 }
14692 }
14693 }
14694
14695 // If the user is a MFOCRF instruction, we know this is safe.
14696 // Otherwise we give up for right now.
14697 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
14698 return SDValue(VCMPrecNode, 0);
14699 }
14700 break;
14701 case ISD::BRCOND: {
14702 SDValue Cond = N->getOperand(1);
14703 SDValue Target = N->getOperand(2);
14704
14705 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14706 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
14707 Intrinsic::loop_decrement) {
14708
14709 // We now need to make the intrinsic dead (it cannot be instruction
14710 // selected).
14711 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
14712 assert(Cond.getNode()->hasOneUse() &&
14713 "Counter decrement has more than one use");
14714
14715 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
14716 N->getOperand(0), Target);
14717 }
14718 }
14719 break;
14720 case ISD::BR_CC: {
14721 // If this is a branch on an altivec predicate comparison, lower this so
14722 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
14723 // lowering is done pre-legalize, because the legalizer lowers the predicate
14724 // compare down to code that is difficult to reassemble.
14725 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
14726 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
14727
14728 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
14729 // value. If so, pass-through the AND to get to the intrinsic.
14730 if (LHS.getOpcode() == ISD::AND &&
14731 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14732 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
14733 Intrinsic::loop_decrement &&
14734 isa<ConstantSDNode>(LHS.getOperand(1)) &&
14735 !isNullConstant(LHS.getOperand(1)))
14736 LHS = LHS.getOperand(0);
14737
14738 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14739 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
14740 Intrinsic::loop_decrement &&
14741 isa<ConstantSDNode>(RHS)) {
14742 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
14743 "Counter decrement comparison is not EQ or NE");
14744
14745 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14746 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
14747 (CC == ISD::SETNE && !Val);
14748
14749 // We now need to make the intrinsic dead (it cannot be instruction
14750 // selected).
14751 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
14752 assert(LHS.getNode()->hasOneUse() &&
14753 "Counter decrement has more than one use");
14754
14755 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
14756 N->getOperand(0), N->getOperand(4));
14757 }
14758
14759 int CompareOpc;
14760 bool isDot;
14761
14762 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14763 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
14764 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
14765 assert(isDot && "Can't compare against a vector result!");
14766
14767 // If this is a comparison against something other than 0/1, then we know
14768 // that the condition is never/always true.
14769 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14770 if (Val != 0 && Val != 1) {
14771 if (CC == ISD::SETEQ) // Cond never true, remove branch.
14772 return N->getOperand(0);
14773 // Always !=, turn it into an unconditional branch.
14774 return DAG.getNode(ISD::BR, dl, MVT::Other,
14775 N->getOperand(0), N->getOperand(4));
14776 }
14777
14778 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
14779
14780 // Create the PPCISD altivec 'dot' comparison node.
14781 SDValue Ops[] = {
14782 LHS.getOperand(2), // LHS of compare
14783 LHS.getOperand(3), // RHS of compare
14785 };
14786 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
14787 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
14788
14789 // Unpack the result based on how the target uses it.
14791 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
14792 default: // Can't happen, don't crash on invalid number though.
14793 case 0: // Branch on the value of the EQ bit of CR6.
14795 break;
14796 case 1: // Branch on the inverted value of the EQ bit of CR6.
14798 break;
14799 case 2: // Branch on the value of the LT bit of CR6.
14801 break;
14802 case 3: // Branch on the inverted value of the LT bit of CR6.
14804 break;
14805 }
14806
14807 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
14808 DAG.getConstant(CompOpc, dl, MVT::i32),
14809 DAG.getRegister(PPC::CR6, MVT::i32),
14810 N->getOperand(4), CompNode.getValue(1));
14811 }
14812 break;
14813 }
14814 case ISD::BUILD_VECTOR:
14815 return DAGCombineBuildVector(N, DCI);
14816 case ISD::ABS:
14817 return combineABS(N, DCI);
14818 case ISD::VSELECT:
14819 return combineVSelect(N, DCI);
14820 }
14821
14822 return SDValue();
14823}
14824
14825SDValue
14827 SelectionDAG &DAG,
14829 // fold (sdiv X, pow2)
14830 EVT VT = N->getValueType(0);
14831 if (VT == MVT::i64 && !Subtarget.isPPC64())
14832 return SDValue();
14833 if ((VT != MVT::i32 && VT != MVT::i64) ||
14834 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
14835 return SDValue();
14836
14837 SDLoc DL(N);
14838 SDValue N0 = N->getOperand(0);
14839
14840 bool IsNegPow2 = (-Divisor).isPowerOf2();
14841 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
14842 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
14843
14844 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
14845 Created.push_back(Op.getNode());
14846
14847 if (IsNegPow2) {
14848 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
14849 Created.push_back(Op.getNode());
14850 }
14851
14852 return Op;
14853}
14854
14855//===----------------------------------------------------------------------===//
14856// Inline Assembly Support
14857//===----------------------------------------------------------------------===//
14858
14860 KnownBits &Known,
14861 const APInt &DemandedElts,
14862 const SelectionDAG &DAG,
14863 unsigned Depth) const {
14864 Known.resetAll();
14865 switch (Op.getOpcode()) {
14866 default: break;
14867 case PPCISD::LBRX: {
14868 // lhbrx is known to have the top bits cleared out.
14869 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
14870 Known.Zero = 0xFFFF0000;
14871 break;
14872 }
14874 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
14875 default: break;
14876 case Intrinsic::ppc_altivec_vcmpbfp_p:
14877 case Intrinsic::ppc_altivec_vcmpeqfp_p:
14878 case Intrinsic::ppc_altivec_vcmpequb_p:
14879 case Intrinsic::ppc_altivec_vcmpequh_p:
14880 case Intrinsic::ppc_altivec_vcmpequw_p:
14881 case Intrinsic::ppc_altivec_vcmpequd_p:
14882 case Intrinsic::ppc_altivec_vcmpequq_p:
14883 case Intrinsic::ppc_altivec_vcmpgefp_p:
14884 case Intrinsic::ppc_altivec_vcmpgtfp_p:
14885 case Intrinsic::ppc_altivec_vcmpgtsb_p:
14886 case Intrinsic::ppc_altivec_vcmpgtsh_p:
14887 case Intrinsic::ppc_altivec_vcmpgtsw_p:
14888 case Intrinsic::ppc_altivec_vcmpgtsd_p:
14889 case Intrinsic::ppc_altivec_vcmpgtsq_p:
14890 case Intrinsic::ppc_altivec_vcmpgtub_p:
14891 case Intrinsic::ppc_altivec_vcmpgtuh_p:
14892 case Intrinsic::ppc_altivec_vcmpgtuw_p:
14893 case Intrinsic::ppc_altivec_vcmpgtud_p:
14894 case Intrinsic::ppc_altivec_vcmpgtuq_p:
14895 Known.Zero = ~1U; // All bits but the low one are known to be zero.
14896 break;
14897 }
14898 }
14899 }
14900}
14901
14903 switch (Subtarget.getCPUDirective()) {
14904 default: break;
14905 case PPC::DIR_970:
14906 case PPC::DIR_PWR4:
14907 case PPC::DIR_PWR5:
14908 case PPC::DIR_PWR5X:
14909 case PPC::DIR_PWR6:
14910 case PPC::DIR_PWR6X:
14911 case PPC::DIR_PWR7:
14912 case PPC::DIR_PWR8:
14913 case PPC::DIR_PWR9:
14914 case PPC::DIR_PWR10:
14915 case PPC::DIR_PWR_FUTURE: {
14916 if (!ML)
14917 break;
14918
14920 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14921 // so that we can decrease cache misses and branch-prediction misses.
14922 // Actual alignment of the loop will depend on the hotness check and other
14923 // logic in alignBlocks.
14924 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
14925 return Align(32);
14926 }
14927
14928 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
14929
14930 // For small loops (between 5 and 8 instructions), align to a 32-byte
14931 // boundary so that the entire loop fits in one instruction-cache line.
14932 uint64_t LoopSize = 0;
14933 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
14934 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
14935 LoopSize += TII->getInstSizeInBytes(*J);
14936 if (LoopSize > 32)
14937 break;
14938 }
14939
14940 if (LoopSize > 16 && LoopSize <= 32)
14941 return Align(32);
14942
14943 break;
14944 }
14945 }
14946
14948}
14949
14950/// getConstraintType - Given a constraint, return the type of
14951/// constraint it is for this target.
14954 if (Constraint.size() == 1) {
14955 switch (Constraint[0]) {
14956 default: break;
14957 case 'b':
14958 case 'r':
14959 case 'f':
14960 case 'd':
14961 case 'v':
14962 case 'y':
14963 return C_RegisterClass;
14964 case 'Z':
14965 // FIXME: While Z does indicate a memory constraint, it specifically
14966 // indicates an r+r address (used in conjunction with the 'y' modifier
14967 // in the replacement string). Currently, we're forcing the base
14968 // register to be r0 in the asm printer (which is interpreted as zero)
14969 // and forming the complete address in the second register. This is
14970 // suboptimal.
14971 return C_Memory;
14972 }
14973 } else if (Constraint == "wc") { // individual CR bits.
14974 return C_RegisterClass;
14975 } else if (Constraint == "wa" || Constraint == "wd" ||
14976 Constraint == "wf" || Constraint == "ws" ||
14977 Constraint == "wi" || Constraint == "ww") {
14978 return C_RegisterClass; // VSX registers.
14979 }
14980 return TargetLowering::getConstraintType(Constraint);
14981}
14982
14983/// Examine constraint type and operand type and determine a weight value.
14984/// This object must already have been set up with the operand type
14985/// and the current alternative constraint selected.
14988 AsmOperandInfo &info, const char *constraint) const {
14990 Value *CallOperandVal = info.CallOperandVal;
14991 // If we don't have a value, we can't do a match,
14992 // but allow it at the lowest weight.
14993 if (!CallOperandVal)
14994 return CW_Default;
14995 Type *type = CallOperandVal->getType();
14996
14997 // Look at the constraint type.
14998 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
14999 return CW_Register; // an individual CR bit.
15000 else if ((StringRef(constraint) == "wa" ||
15001 StringRef(constraint) == "wd" ||
15002 StringRef(constraint) == "wf") &&
15003 type->isVectorTy())
15004 return CW_Register;
15005 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15006 return CW_Register; // just hold 64-bit integers data.
15007 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15008 return CW_Register;
15009 else if (StringRef(constraint) == "ww" && type->isFloatTy())
15010 return CW_Register;
15011
15012 switch (*constraint) {
15013 default:
15015 break;
15016 case 'b':
15017 if (type->isIntegerTy())
15018 weight = CW_Register;
15019 break;
15020 case 'f':
15021 if (type->isFloatTy())
15022 weight = CW_Register;
15023 break;
15024 case 'd':
15025 if (type->isDoubleTy())
15026 weight = CW_Register;
15027 break;
15028 case 'v':
15029 if (type->isVectorTy())
15030 weight = CW_Register;
15031 break;
15032 case 'y':
15033 weight = CW_Register;
15034 break;
15035 case 'Z':
15036 weight = CW_Memory;
15037 break;
15038 }
15039 return weight;
15040}
15041
15042std::pair<unsigned, const TargetRegisterClass *>
15044 StringRef Constraint,
15045 MVT VT) const {
15046 if (Constraint.size() == 1) {
15047 // GCC RS6000 Constraint Letters
15048 switch (Constraint[0]) {
15049 case 'b': // R1-R31
15050 if (VT == MVT::i64 && Subtarget.isPPC64())
15051 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15052 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15053 case 'r': // R0-R31
15054 if (VT == MVT::i64 && Subtarget.isPPC64())
15055 return std::make_pair(0U, &PPC::G8RCRegClass);
15056 return std::make_pair(0U, &PPC::GPRCRegClass);
15057 // 'd' and 'f' constraints are both defined to be "the floating point
15058 // registers", where one is for 32-bit and the other for 64-bit. We don't
15059 // really care overly much here so just give them all the same reg classes.
15060 case 'd':
15061 case 'f':
15062 if (Subtarget.hasSPE()) {
15063 if (VT == MVT::f32 || VT == MVT::i32)
15064 return std::make_pair(0U, &PPC::GPRCRegClass);
15065 if (VT == MVT::f64 || VT == MVT::i64)
15066 return std::make_pair(0U, &PPC::SPERCRegClass);
15067 } else {
15068 if (VT == MVT::f32 || VT == MVT::i32)
15069 return std::make_pair(0U, &PPC::F4RCRegClass);
15070 if (VT == MVT::f64 || VT == MVT::i64)
15071 return std::make_pair(0U, &PPC::F8RCRegClass);
15072 }
15073 break;
15074 case 'v':
15075 if (Subtarget.hasAltivec())
15076 return std::make_pair(0U, &PPC::VRRCRegClass);
15077 break;
15078 case 'y': // crrc
15079 return std::make_pair(0U, &PPC::CRRCRegClass);
15080 }
15081 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15082 // An individual CR bit.
15083 return std::make_pair(0U, &PPC::CRBITRCRegClass);
15084 } else if ((Constraint == "wa" || Constraint == "wd" ||
15085 Constraint == "wf" || Constraint == "wi") &&
15086 Subtarget.hasVSX()) {
15087 return std::make_pair(0U, &PPC::VSRCRegClass);
15088 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15089 if (VT == MVT::f32 && Subtarget.hasP8Vector())
15090 return std::make_pair(0U, &PPC::VSSRCRegClass);
15091 else
15092 return std::make_pair(0U, &PPC::VSFRCRegClass);
15093 }
15094
15095 // If we name a VSX register, we can't defer to the base class because it
15096 // will not recognize the correct register (their names will be VSL{0-31}
15097 // and V{0-31} so they won't match). So we match them here.
15098 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15099 int VSNum = atoi(Constraint.data() + 3);
15100 assert(VSNum >= 0 && VSNum <= 63 &&
15101 "Attempted to access a vsr out of range");
15102 if (VSNum < 32)
15103 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15104 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15105 }
15106 std::pair<unsigned, const TargetRegisterClass *> R =
15108
15109 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15110 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15111 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15112 // register.
15113 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15114 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15115 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15116 PPC::GPRCRegClass.contains(R.first))
15117 return std::make_pair(TRI->getMatchingSuperReg(R.first,
15118 PPC::sub_32, &PPC::G8RCRegClass),
15119 &PPC::G8RCRegClass);
15120
15121 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15122 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15123 R.first = PPC::CR0;
15124 R.second = &PPC::CRRCRegClass;
15125 }
15126
15127 return R;
15128}
15129
15130/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15131/// vector. If it is invalid, don't add anything to Ops.
15133 std::string &Constraint,
15134 std::vector<SDValue>&Ops,
15135 SelectionDAG &DAG) const {
15136 SDValue Result;
15137
15138 // Only support length 1 constraints.
15139 if (Constraint.length() > 1) return;
15140
15141 char Letter = Constraint[0];
15142 switch (Letter) {
15143 default: break;
15144 case 'I':
15145 case 'J':
15146 case 'K':
15147 case 'L':
15148 case 'M':
15149 case 'N':
15150 case 'O':
15151 case 'P': {
15153 if (!CST) return; // Must be an immediate to match.
15154 SDLoc dl(Op);
15155 int64_t Value = CST->getSExtValue();
15156 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15157 // numbers are printed as such.
15158 switch (Letter) {
15159 default: llvm_unreachable("Unknown constraint letter!");
15160 case 'I': // "I" is a signed 16-bit constant.
15161 if (isInt<16>(Value))
15162 Result = DAG.getTargetConstant(Value, dl, TCVT);
15163 break;
15164 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
15166 Result = DAG.getTargetConstant(Value, dl, TCVT);
15167 break;
15168 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
15170 Result = DAG.getTargetConstant(Value, dl, TCVT);
15171 break;
15172 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
15173 if (isUInt<16>(Value))
15174 Result = DAG.getTargetConstant(Value, dl, TCVT);
15175 break;
15176 case 'M': // "M" is a constant that is greater than 31.
15177 if (Value > 31)
15178 Result = DAG.getTargetConstant(Value, dl, TCVT);
15179 break;
15180 case 'N': // "N" is a positive constant that is an exact power of two.
15181 if (Value > 0 && isPowerOf2_64(Value))
15182 Result = DAG.getTargetConstant(Value, dl, TCVT);
15183 break;
15184 case 'O': // "O" is the constant zero.
15185 if (Value == 0)
15186 Result = DAG.getTargetConstant(Value, dl, TCVT);
15187 break;
15188 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
15189 if (isInt<16>(-Value))
15190 Result = DAG.getTargetConstant(Value, dl, TCVT);
15191 break;
15192 }
15193 break;
15194 }
15195 }
15196
15197 if (Result.getNode()) {
15198 Ops.push_back(Result);
15199 return;
15200 }
15201
15202 // Handle standard constraint letters.
15203 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15204}
15205
15206// isLegalAddressingMode - Return true if the addressing mode represented
15207// by AM is legal for this target, for a load/store of the specified type.
15209 const AddrMode &AM, Type *Ty,
15210 unsigned AS,
15211 Instruction *I) const {
15212 // Vector type r+i form is supported since power9 as DQ form. We don't check
15213 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15214 // imm form is preferred and the offset can be adjusted to use imm form later
15215 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15216 // max offset to check legal addressing mode, we should be a little aggressive
15217 // to contain other offsets for that LSRUse.
15218 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15219 return false;
15220
15221 // PPC allows a sign-extended 16-bit immediate field.
15222 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15223 return false;
15224
15225 // No global is ever allowed as a base.
15226 if (AM.BaseGV)
15227 return false;
15228
15229 // PPC only support r+r,
15230 switch (AM.Scale) {
15231 case 0: // "r+i" or just "i", depending on HasBaseReg.
15232 break;
15233 case 1:
15234 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
15235 return false;
15236 // Otherwise we have r+r or r+i.
15237 break;
15238 case 2:
15239 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
15240 return false;
15241 // Allow 2*r as r+r.
15242 break;
15243 default:
15244 // No other scales are supported.
15245 return false;
15246 }
15247
15248 return true;
15249}
15250
15251SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15252 SelectionDAG &DAG) const {
15254 MachineFrameInfo &MFI = MF.getFrameInfo();
15255 MFI.setReturnAddressIsTaken(true);
15256
15258 return SDValue();
15259
15260 SDLoc dl(Op);
15261 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15262
15263 // Make sure the function does not optimize away the store of the RA to
15264 // the stack.
15265 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15266 FuncInfo->setLRStoreRequired();
15267 bool isPPC64 = Subtarget.isPPC64();
15268 auto PtrVT = getPointerTy(MF.getDataLayout());
15269
15270 if (Depth > 0) {
15271 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15272 SDValue Offset =
15273 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15274 isPPC64 ? MVT::i64 : MVT::i32);
15275 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15278 }
15279
15280 // Just load the return address off the stack.
15281 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15282 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15284}
15285
15286SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15287 SelectionDAG &DAG) const {
15288 SDLoc dl(Op);
15289 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15290
15292 MachineFrameInfo &MFI = MF.getFrameInfo();
15293 MFI.setFrameAddressIsTaken(true);
15294
15296 bool isPPC64 = PtrVT == MVT::i64;
15297
15298 // Naked functions never have a frame pointer, and so we use r1. For all
15299 // other functions, this decision must be delayed until during PEI.
15300 unsigned FrameReg;
15301 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15302 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15303 else
15304 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15305
15306 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15307 PtrVT);
15308 while (Depth--)
15309 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15311 return FrameAddr;
15312}
15313
15314// FIXME? Maybe this could be a TableGen attribute on some registers and
15315// this table could be generated automatically from RegInfo.
15317 const MachineFunction &MF) const {
15318 bool isPPC64 = Subtarget.isPPC64();
15319
15320 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15321 if (!is64Bit && VT != LLT::scalar(32))
15322 report_fatal_error("Invalid register global variable type");
15323
15325 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15326 .Case("r2", isPPC64 ? Register() : PPC::R2)
15327 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15328 .Default(Register());
15329
15330 if (Reg)
15331 return Reg;
15332 report_fatal_error("Invalid register name global variable");
15333}
15334
15336 // 32-bit SVR4 ABI access everything as got-indirect.
15337 if (Subtarget.is32BitELFABI())
15338 return true;
15339
15340 // AIX accesses everything indirectly through the TOC, which is similar to
15341 // the GOT.
15342 if (Subtarget.isAIXABI())
15343 return true;
15344
15346 // If it is small or large code model, module locals are accessed
15347 // indirectly by loading their address from .toc/.got.
15349 return true;
15350
15351 // JumpTable and BlockAddress are accessed as got-indirect.
15353 return true;
15354
15356 return Subtarget.isGVIndirectSymbol(G->getGlobal());
15357
15358 return false;
15359}
15360
15361bool
15363 // The PowerPC target isn't yet aware of offsets.
15364 return false;
15365}
15366
15368 const CallInst &I,
15369 MachineFunction &MF,
15370 unsigned Intrinsic) const {
15371 switch (Intrinsic) {
15372 case Intrinsic::ppc_altivec_lvx:
15373 case Intrinsic::ppc_altivec_lvxl:
15374 case Intrinsic::ppc_altivec_lvebx:
15375 case Intrinsic::ppc_altivec_lvehx:
15376 case Intrinsic::ppc_altivec_lvewx:
15377 case Intrinsic::ppc_vsx_lxvd2x:
15378 case Intrinsic::ppc_vsx_lxvw4x:
15379 case Intrinsic::ppc_vsx_lxvd2x_be:
15380 case Intrinsic::ppc_vsx_lxvw4x_be:
15381 case Intrinsic::ppc_vsx_lxvl:
15382 case Intrinsic::ppc_vsx_lxvll: {
15383 EVT VT;
15384 switch (Intrinsic) {
15385 case Intrinsic::ppc_altivec_lvebx:
15386 VT = MVT::i8;
15387 break;
15388 case Intrinsic::ppc_altivec_lvehx:
15389 VT = MVT::i16;
15390 break;
15391 case Intrinsic::ppc_altivec_lvewx:
15392 VT = MVT::i32;
15393 break;
15394 case Intrinsic::ppc_vsx_lxvd2x:
15395 case Intrinsic::ppc_vsx_lxvd2x_be:
15396 VT = MVT::v2f64;
15397 break;
15398 default:
15399 VT = MVT::v4i32;
15400 break;
15401 }
15402
15403 Info.opc = ISD::INTRINSIC_W_CHAIN;
15404 Info.memVT = VT;
15405 Info.ptrVal = I.getArgOperand(0);
15406 Info.offset = -VT.getStoreSize()+1;
15407 Info.size = 2*VT.getStoreSize()-1;
15408 Info.align = Align(1);
15409 Info.flags = MachineMemOperand::MOLoad;
15410 return true;
15411 }
15412 case Intrinsic::ppc_altivec_stvx:
15413 case Intrinsic::ppc_altivec_stvxl:
15414 case Intrinsic::ppc_altivec_stvebx:
15415 case Intrinsic::ppc_altivec_stvehx:
15416 case Intrinsic::ppc_altivec_stvewx:
15417 case Intrinsic::ppc_vsx_stxvd2x:
15418 case Intrinsic::ppc_vsx_stxvw4x:
15419 case Intrinsic::ppc_vsx_stxvd2x_be:
15420 case Intrinsic::ppc_vsx_stxvw4x_be:
15421 case Intrinsic::ppc_vsx_stxvl:
15422 case Intrinsic::ppc_vsx_stxvll: {
15423 EVT VT;
15424 switch (Intrinsic) {
15425 case Intrinsic::ppc_altivec_stvebx:
15426 VT = MVT::i8;
15427 break;
15428 case Intrinsic::ppc_altivec_stvehx:
15429 VT = MVT::i16;
15430 break;
15431 case Intrinsic::ppc_altivec_stvewx:
15432 VT = MVT::i32;
15433 break;
15434 case Intrinsic::ppc_vsx_stxvd2x:
15435 case Intrinsic::ppc_vsx_stxvd2x_be:
15436 VT = MVT::v2f64;
15437 break;
15438 default:
15439 VT = MVT::v4i32;
15440 break;
15441 }
15442
15443 Info.opc = ISD::INTRINSIC_VOID;
15444 Info.memVT = VT;
15445 Info.ptrVal = I.getArgOperand(1);
15446 Info.offset = -VT.getStoreSize()+1;
15447 Info.size = 2*VT.getStoreSize()-1;
15448 Info.align = Align(1);
15449 Info.flags = MachineMemOperand::MOStore;
15450 return true;
15451 }
15452 default:
15453 break;
15454 }
15455
15456 return false;
15457}
15458
15459/// It returns EVT::Other if the type should be determined using generic
15460/// target-independent logic.
15462 const MemOp &Op, const AttributeList &FuncAttributes) const {
15463 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15464 // We should use Altivec/VSX loads and stores when available. For unaligned
15465 // addresses, unaligned VSX loads are only fast starting with the P8.
15466 if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15467 (Op.isAligned(Align(16)) ||
15468 ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15469 return MVT::v4i32;
15470 }
15471
15472 if (Subtarget.isPPC64()) {
15473 return MVT::i64;
15474 }
15475
15476 return MVT::i32;
15477}
15478
15479/// Returns true if it is beneficial to convert a load of a constant
15480/// to just the constant itself.
15482 Type *Ty) const {
15483 assert(Ty->isIntegerTy());
15484
15485 unsigned BitSize = Ty->getPrimitiveSizeInBits();
15486 return !(BitSize == 0 || BitSize > 64);
15487}
15488
15490 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15491 return false;
15492 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15493 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15494 return NumBits1 == 64 && NumBits2 == 32;
15495}
15496
15498 if (!VT1.isInteger() || !VT2.isInteger())
15499 return false;
15500 unsigned NumBits1 = VT1.getSizeInBits();
15501 unsigned NumBits2 = VT2.getSizeInBits();
15502 return NumBits1 == 64 && NumBits2 == 32;
15503}
15504
15506 // Generally speaking, zexts are not free, but they are free when they can be
15507 // folded with other operations.
15508 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
15509 EVT MemVT = LD->getMemoryVT();
15510 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
15511 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
15512 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
15513 LD->getExtensionType() == ISD::ZEXTLOAD))
15514 return true;
15515 }
15516
15517 // FIXME: Add other cases...
15518 // - 32-bit shifts with a zext to i64
15519 // - zext after ctlz, bswap, etc.
15520 // - zext after and by a constant mask
15521
15522 return TargetLowering::isZExtFree(Val, VT2);
15523}
15524
15526 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
15527 "invalid fpext types");
15528 // Extending to float128 is not free.
15529 if (DestVT == MVT::f128)
15530 return false;
15531 return true;
15532}
15533
15535 return isInt<16>(Imm) || isUInt<16>(Imm);
15536}
15537
15539 return isInt<16>(Imm) || isUInt<16>(Imm);
15540}
15541
15543 unsigned,
15544 unsigned,
15546 bool *Fast) const {
15548 return false;
15549
15550 // PowerPC supports unaligned memory access for simple non-vector types.
15551 // Although accessing unaligned addresses is not as efficient as accessing
15552 // aligned addresses, it is generally more efficient than manual expansion,
15553 // and generally only traps for software emulation when crossing page
15554 // boundaries.
15555
15556 if (!VT.isSimple())
15557 return false;
15558
15559 if (VT.isFloatingPoint() && !VT.isVector() &&
15560 !Subtarget.allowsUnalignedFPAccess())
15561 return false;
15562
15563 if (VT.getSimpleVT().isVector()) {
15564 if (Subtarget.hasVSX()) {
15565 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
15566 VT != MVT::v4f32 && VT != MVT::v4i32)
15567 return false;
15568 } else {
15569 return false;
15570 }
15571 }
15572
15573 if (VT == MVT::ppcf128)
15574 return false;
15575
15576 if (Fast)
15577 *Fast = true;
15578
15579 return true;
15580}
15581
15583 SDValue C) const {
15584 // Check integral scalar types.
15585 if (!VT.isScalarInteger())
15586 return false;
15587 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
15588 if (!ConstNode->getAPIntValue().isSignedIntN(64))
15589 return false;
15590 // This transformation will generate >= 2 operations. But the following
15591 // cases will generate <= 2 instructions during ISEL. So exclude them.
15592 // 1. If the constant multiplier fits 16 bits, it can be handled by one
15593 // HW instruction, ie. MULLI
15594 // 2. If the multiplier after shifted fits 16 bits, an extra shift
15595 // instruction is needed than case 1, ie. MULLI and RLDICR
15596 int64_t Imm = ConstNode->getSExtValue();
15597 unsigned Shift = countTrailingZeros<uint64_t>(Imm);
15598 Imm >>= Shift;
15599 if (isInt<16>(Imm))
15600 return false;
15601 uint64_t UImm = static_cast<uint64_t>(Imm);
15602 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
15603 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
15604 return true;
15605 }
15606 return false;
15607}
15608
15614
15616 Type *Ty) const {
15617 switch (Ty->getScalarType()->getTypeID()) {
15618 case Type::FloatTyID:
15619 case Type::DoubleTyID:
15620 return true;
15621 case Type::FP128TyID:
15622 return Subtarget.hasP9Vector();
15623 default:
15624 return false;
15625 }
15626}
15627
15628// FIXME: add more patterns which are not profitable to hoist.
15630 if (!I->hasOneUse())
15631 return true;
15632
15634 assert(User && "A single use instruction with no uses.");
15635
15636 switch (I->getOpcode()) {
15637 case Instruction::FMul: {
15638 // Don't break FMA, PowerPC prefers FMA.
15639 if (User->getOpcode() != Instruction::FSub &&
15640 User->getOpcode() != Instruction::FAdd)
15641 return true;
15642
15643 const TargetOptions &Options = getTargetMachine().Options;
15644 const Function *F = I->getFunction();
15645 const DataLayout &DL = F->getParent()->getDataLayout();
15646 Type *Ty = User->getOperand(0)->getType();
15647
15648 return !(
15651 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
15652 }
15653 case Instruction::Load: {
15654 // Don't break "store (load float*)" pattern, this pattern will be combined
15655 // to "store (load int32)" in later InstCombine pass. See function
15656 // combineLoadToOperationType. On PowerPC, loading a float point takes more
15657 // cycles than loading a 32 bit integer.
15658 LoadInst *LI = cast<LoadInst>(I);
15659 // For the loads that combineLoadToOperationType does nothing, like
15660 // ordered load, it should be profitable to hoist them.
15661 // For swifterror load, it can only be used for pointer to pointer type, so
15662 // later type check should get rid of this case.
15663 if (!LI->isUnordered())
15664 return true;
15665
15666 if (User->getOpcode() != Instruction::Store)
15667 return true;
15668
15669 if (I->getType()->getTypeID() != Type::FloatTyID)
15670 return true;
15671
15672 return false;
15673 }
15674 default:
15675 return true;
15676 }
15677 return true;
15678}
15679
15680const MCPhysReg *
15682 // LR is a callee-save register, but we must treat it as clobbered by any call
15683 // site. Hence we include LR in the scratch registers, which are in turn added
15684 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
15685 // to CTR, which is used by any indirect call.
15686 static const MCPhysReg ScratchRegs[] = {
15687 PPC::X12, PPC::LR8, PPC::CTR8, 0
15688 };
15689
15690 return ScratchRegs;
15691}
15692
15694 const Constant *PersonalityFn) const {
15695 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
15696}
15697
15699 const Constant *PersonalityFn) const {
15700 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
15701}
15702
15703bool
15705 EVT VT , unsigned DefinedValues) const {
15706 if (VT == MVT::v2i64)
15707 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
15708
15709 if (Subtarget.hasVSX())
15710 return true;
15711
15713}
15714
15721
15722// Create a fast isel object.
15723FastISel *
15725 const TargetLibraryInfo *LibInfo) const {
15726 return PPC::createFastISel(FuncInfo, LibInfo);
15727}
15728
15729// 'Inverted' means the FMA opcode after negating one multiplicand.
15730// For example, (fma -a b c) = (fnmsub a b c)
15731static unsigned invertFMAOpcode(unsigned Opc) {
15732 switch (Opc) {
15733 default:
15734 llvm_unreachable("Invalid FMA opcode for PowerPC!");
15735 case ISD::FMA:
15736 return PPCISD::FNMSUB;
15737 case PPCISD::FNMSUB:
15738 return ISD::FMA;
15739 }
15740}
15741
15743 bool LegalOps, bool OptForSize,
15744 NegatibleCost &Cost,
15745 unsigned Depth) const {
15747 return SDValue();
15748
15749 unsigned Opc = Op.getOpcode();
15750 EVT VT = Op.getValueType();
15751 SDNodeFlags Flags = Op.getNode()->getFlags();
15752
15753 switch (Opc) {
15754 case PPCISD::FNMSUB:
15755 if (!Op.hasOneUse() || !isTypeLegal(VT))
15756 break;
15757
15758 const TargetOptions &Options = getTargetMachine().Options;
15759 SDValue N0 = Op.getOperand(0);
15760 SDValue N1 = Op.getOperand(1);
15761 SDValue N2 = Op.getOperand(2);
15762 SDLoc Loc(Op);
15763
15765 SDValue NegN2 =
15766 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
15767
15768 if (!NegN2)
15769 return SDValue();
15770
15771 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
15772 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
15773 // These transformations may change sign of zeroes. For example,
15774 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
15775 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
15776 // Try and choose the cheaper one to negate.
15778 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
15779 N0Cost, Depth + 1);
15780
15782 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
15783 N1Cost, Depth + 1);
15784
15785 if (NegN0 && N0Cost <= N1Cost) {
15786 Cost = std::min(N0Cost, N2Cost);
15787 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
15788 } else if (NegN1) {
15789 Cost = std::min(N1Cost, N2Cost);
15790 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
15791 }
15792 }
15793
15794 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
15795 if (isOperationLegal(ISD::FMA, VT)) {
15796 Cost = N2Cost;
15797 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
15798 }
15799
15800 break;
15801 }
15802
15803 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
15804 Cost, Depth);
15805}
15806
15807// Override to enable LOAD_STACK_GUARD lowering on Linux.
15809 if (!Subtarget.isTargetLinux())
15811 return true;
15812}
15813
15814// Override to disable global variable loading on Linux.
15816 if (!Subtarget.isTargetLinux())
15818}
15819
15821 bool ForCodeSize) const {
15822 if (!VT.isSimple() || !Subtarget.hasVSX())
15823 return false;
15824
15825 switch(VT.getSimpleVT().SimpleTy) {
15826 default:
15827 // For FP types that are currently not supported by PPC backend, return
15828 // false. Examples: f16, f80.
15829 return false;
15830 case MVT::f32:
15831 case MVT::f64:
15832 if (Subtarget.hasPrefixInstrs()) {
15833 // With prefixed instructions, we can materialize anything that can be
15834 // represented with a 32-bit immediate, not just positive zero.
15835 APFloat APFloatOfImm = Imm;
15837 }
15839 case MVT::ppcf128:
15840 return Imm.isPosZero();
15841 }
15842}
15843
15844// For vector shift operation op, fold
15845// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
15847 SelectionDAG &DAG) {
15848 SDValue N0 = N->getOperand(0);
15849 SDValue N1 = N->getOperand(1);
15850 EVT VT = N0.getValueType();
15851 unsigned OpSizeInBits = VT.getScalarSizeInBits();
15852 unsigned Opcode = N->getOpcode();
15853 unsigned TargetOpcode;
15854
15855 switch (Opcode) {
15856 default:
15857 llvm_unreachable("Unexpected shift operation");
15858 case ISD::SHL:
15859 TargetOpcode = PPCISD::SHL;
15860 break;
15861 case ISD::SRL:
15862 TargetOpcode = PPCISD::SRL;
15863 break;
15864 case ISD::SRA:
15865 TargetOpcode = PPCISD::SRA;
15866 break;
15867 }
15868
15869 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
15870 N1->getOpcode() == ISD::AND)
15871 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
15872 if (Mask->getZExtValue() == OpSizeInBits - 1)
15873 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
15874
15875 return SDValue();
15876}
15877
15878SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
15879 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15880 return Value;
15881
15882 SDValue N0 = N->getOperand(0);
15883 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15884 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
15885 N0.getOpcode() != ISD::SIGN_EXTEND ||
15886 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
15887 N->getValueType(0) != MVT::i64)
15888 return SDValue();
15889
15890 // We can't save an operation here if the value is already extended, and
15891 // the existing shift is easier to combine.
15892 SDValue ExtsSrc = N0.getOperand(0);
15893 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
15894 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
15895 return SDValue();
15896
15897 SDLoc DL(N0);
15898 SDValue ShiftBy = SDValue(CN1, 0);
15899 // We want the shift amount to be i32 on the extswli, but the shift could
15900 // have an i64.
15901 if (ShiftBy.getValueType() == MVT::i64)
15902 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
15903
15904 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
15905 ShiftBy);
15906}
15907
15908SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
15909 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15910 return Value;
15911
15912 return SDValue();
15913}
15914
15915SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
15916 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15917 return Value;
15918
15919 return SDValue();
15920}
15921
15922// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
15923// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
15924// When C is zero, the equation (addi Z, -C) can be simplified to Z
15925// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
15927 const PPCSubtarget &Subtarget) {
15928 if (!Subtarget.isPPC64())
15929 return SDValue();
15930
15931 SDValue LHS = N->getOperand(0);
15932 SDValue RHS = N->getOperand(1);
15933
15934 auto isZextOfCompareWithConstant = [](SDValue Op) {
15935 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
15936 Op.getValueType() != MVT::i64)
15937 return false;
15938
15939 SDValue Cmp = Op.getOperand(0);
15940 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
15941 Cmp.getOperand(0).getValueType() != MVT::i64)
15942 return false;
15943
15944 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
15945 int64_t NegConstant = 0 - Constant->getSExtValue();
15946 // Due to the limitations of the addi instruction,
15947 // -C is required to be [-32768, 32767].
15948 return isInt<16>(NegConstant);
15949 }
15950
15951 return false;
15952 };
15953
15956
15957 // If there is a pattern, canonicalize a zext operand to the RHS.
15959 std::swap(LHS, RHS);
15960 else if (!LHSHasPattern && !RHSHasPattern)
15961 return SDValue();
15962
15963 SDLoc DL(N);
15965 SDValue Cmp = RHS.getOperand(0);
15966 SDValue Z = Cmp.getOperand(0);
15967 auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
15968
15969 assert(Constant && "Constant Should not be a null pointer.");
15970 int64_t NegConstant = 0 - Constant->getSExtValue();
15971
15972 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
15973 default: break;
15974 case ISD::SETNE: {
15975 // when C == 0
15976 // --> addze X, (addic Z, -1).carry
15977 // /
15978 // add X, (zext(setne Z, C))--
15979 // \ when -32768 <= -C <= 32767 && C != 0
15980 // --> addze X, (addic (addi Z, -C), -1).carry
15983 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
15985 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
15986 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
15987 SDValue(Addc.getNode(), 1));
15988 }
15989 case ISD::SETEQ: {
15990 // when C == 0
15991 // --> addze X, (subfic Z, 0).carry
15992 // /
15993 // add X, (zext(sete Z, C))--
15994 // \ when -32768 <= -C <= 32767 && C != 0
15995 // --> addze X, (subfic (addi Z, -C), 0).carry
15998 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16000 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16001 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16002 SDValue(Subc.getNode(), 1));
16003 }
16004 }
16005
16006 return SDValue();
16007}
16008
16009// Transform
16010// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16011// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16012// In this case both C1 and C2 must be known constants.
16013// C1+C2 must fit into a 34 bit signed integer.
16015 const PPCSubtarget &Subtarget) {
16016 if (!Subtarget.isUsingPCRelativeCalls())
16017 return SDValue();
16018
16019 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16020 // If we find that node try to cast the Global Address and the Constant.
16021 SDValue LHS = N->getOperand(0);
16022 SDValue RHS = N->getOperand(1);
16023
16024 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16025 std::swap(LHS, RHS);
16026
16027 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16028 return SDValue();
16029
16030 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16033
16034 // Check that both casts succeeded.
16035 if (!GSDN || !ConstNode)
16036 return SDValue();
16037
16038 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16039 SDLoc DL(GSDN);
16040
16041 // The signed int offset needs to fit in 34 bits.
16042 if (!isInt<34>(NewOffset))
16043 return SDValue();
16044
16045 // The new global address is a copy of the old global address except
16046 // that it has the updated Offset.
16047 SDValue GA =
16048 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16049 NewOffset, GSDN->getTargetFlags());
16051 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16052 return MatPCRel;
16053}
16054
16055SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16056 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16057 return Value;
16058
16059 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16060 return Value;
16061
16062 return SDValue();
16063}
16064
16065// Detect TRUNCATE operations on bitcasts of float128 values.
16066// What we are looking for here is the situtation where we extract a subset
16067// of bits from a 128 bit float.
16068// This can be of two forms:
16069// 1) BITCAST of f128 feeding TRUNCATE
16070// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16071// The reason this is required is because we do not have a legal i128 type
16072// and so we want to prevent having to store the f128 and then reload part
16073// of it.
16074SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16075 DAGCombinerInfo &DCI) const {
16076 // If we are using CRBits then try that first.
16077 if (Subtarget.useCRBits()) {
16078 // Check if CRBits did anything and return that if it did.
16079 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16080 return CRTruncValue;
16081 }
16082
16083 SDLoc dl(N);
16084 SDValue Op0 = N->getOperand(0);
16085
16086 // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16087 if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16088 EVT VT = N->getValueType(0);
16089 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16090 return SDValue();
16091 SDValue Sub = Op0.getOperand(0);
16092 if (Sub.getOpcode() == ISD::SUB) {
16093 SDValue SubOp0 = Sub.getOperand(0);
16094 SDValue SubOp1 = Sub.getOperand(1);
16095 if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16096 (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16097 return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16098 SubOp1.getOperand(0),
16099 DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16100 }
16101 }
16102 }
16103
16104 // Looking for a truncate of i128 to i64.
16105 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16106 return SDValue();
16107
16108 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16109
16110 // SRL feeding TRUNCATE.
16111 if (Op0.getOpcode() == ISD::SRL) {
16113 // The right shift has to be by 64 bits.
16114 if (!ConstNode || ConstNode->getZExtValue() != 64)
16115 return SDValue();
16116
16117 // Switch the element number to extract.
16118 EltToExtract = EltToExtract ? 0 : 1;
16119 // Update Op0 past the SRL.
16120 Op0 = Op0.getOperand(0);
16121 }
16122
16123 // BITCAST feeding a TRUNCATE possibly via SRL.
16124 if (Op0.getOpcode() == ISD::BITCAST &&
16125 Op0.getValueType() == MVT::i128 &&
16126 Op0.getOperand(0).getValueType() == MVT::f128) {
16127 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16128 return DCI.DAG.getNode(
16129 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16130 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16131 }
16132 return SDValue();
16133}
16134
16135SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16136 SelectionDAG &DAG = DCI.DAG;
16137
16139 if (!ConstOpOrElement)
16140 return SDValue();
16141
16142 // An imul is usually smaller than the alternative sequence for legal type.
16144 isOperationLegal(ISD::MUL, N->getValueType(0)))
16145 return SDValue();
16146
16147 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16148 switch (this->Subtarget.getCPUDirective()) {
16149 default:
16150 // TODO: enhance the condition for subtarget before pwr8
16151 return false;
16152 case PPC::DIR_PWR8:
16153 // type mul add shl
16154 // scalar 4 1 1
16155 // vector 7 2 2
16156 return true;
16157 case PPC::DIR_PWR9:
16158 case PPC::DIR_PWR10:
16160 // type mul add shl
16161 // scalar 5 2 2
16162 // vector 7 2 2
16163
16164 // The cycle RATIO of related operations are showed as a table above.
16165 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16166 // scalar and vector type. For 2 instrs patterns, add/sub + shl
16167 // are 4, it is always profitable; but for 3 instrs patterns
16168 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16169 // So we should only do it for vector type.
16170 return IsAddOne && IsNeg ? VT.isVector() : true;
16171 }
16172 };
16173
16174 EVT VT = N->getValueType(0);
16175 SDLoc DL(N);
16176
16177 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16178 bool IsNeg = MulAmt.isNegative();
16179 APInt MulAmtAbs = MulAmt.abs();
16180
16181 if ((MulAmtAbs - 1).isPowerOf2()) {
16182 // (mul x, 2^N + 1) => (add (shl x, N), x)
16183 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16184
16185 if (!IsProfitable(IsNeg, true, VT))
16186 return SDValue();
16187
16188 SDValue Op0 = N->getOperand(0);
16189 SDValue Op1 =
16190 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16191 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16192 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16193
16194 if (!IsNeg)
16195 return Res;
16196
16197 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16198 } else if ((MulAmtAbs + 1).isPowerOf2()) {
16199 // (mul x, 2^N - 1) => (sub (shl x, N), x)
16200 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16201
16202 if (!IsProfitable(IsNeg, false, VT))
16203 return SDValue();
16204
16205 SDValue Op0 = N->getOperand(0);
16206 SDValue Op1 =
16207 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16208 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16209
16210 if (!IsNeg)
16211 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16212 else
16213 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16214
16215 } else {
16216 return SDValue();
16217 }
16218}
16219
16220// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16221// in combiner since we need to check SD flags and other subtarget features.
16222SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16223 DAGCombinerInfo &DCI) const {
16224 SDValue N0 = N->getOperand(0);
16225 SDValue N1 = N->getOperand(1);
16226 SDValue N2 = N->getOperand(2);
16227 SDNodeFlags Flags = N->getFlags();
16228 EVT VT = N->getValueType(0);
16229 SelectionDAG &DAG = DCI.DAG;
16230 const TargetOptions &Options = getTargetMachine().Options;
16231 unsigned Opc = N->getOpcode();
16232 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16233 bool LegalOps = !DCI.isBeforeLegalizeOps();
16234 SDLoc Loc(N);
16235
16236 if (!isOperationLegal(ISD::FMA, VT))
16237 return SDValue();
16238
16239 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16240 // since (fnmsub a b c)=-0 while c-ab=+0.
16241 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16242 return SDValue();
16243
16244 // (fma (fneg a) b c) => (fnmsub a b c)
16245 // (fnmsub (fneg a) b c) => (fma a b c)
16246 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16247 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16248
16249 // (fma a (fneg b) c) => (fnmsub a b c)
16250 // (fnmsub a (fneg b) c) => (fma a b c)
16251 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16252 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16253
16254 return SDValue();
16255}
16256
16257bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16258 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16259 if (!Subtarget.is64BitELFABI())
16260 return false;
16261
16262 // If not a tail call then no need to proceed.
16263 if (!CI->isTailCall())
16264 return false;
16265
16266 // If sibling calls have been disabled and tail-calls aren't guaranteed
16267 // there is no reason to duplicate.
16268 auto &TM = getTargetMachine();
16269 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16270 return false;
16271
16272 // Can't tail call a function called indirectly, or if it has variadic args.
16273 const Function *Callee = CI->getCalledFunction();
16274 if (!Callee || Callee->isVarArg())
16275 return false;
16276
16277 // Make sure the callee and caller calling conventions are eligible for tco.
16278 const Function *Caller = CI->getParent()->getParent();
16279 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16280 CI->getCallingConv()))
16281 return false;
16282
16283 // If the function is local then we have a good chance at tail-calling it
16284 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16285}
16286
16287bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16288 if (!Subtarget.hasVSX())
16289 return false;
16290 if (Subtarget.hasP9Vector() && VT == MVT::f128)
16291 return true;
16292 return VT == MVT::f32 || VT == MVT::f64 ||
16293 VT == MVT::v4f32 || VT == MVT::v2f64;
16294}
16295
16296bool PPCTargetLowering::
16297isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16298 const Value *Mask = AndI.getOperand(1);
16299 // If the mask is suitable for andi. or andis. we should sink the and.
16300 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16301 // Can't handle constants wider than 64-bits.
16302 if (CI->getBitWidth() > 64)
16303 return false;
16304 int64_t ConstVal = CI->getZExtValue();
16305 return isUInt<16>(ConstVal) ||
16306 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16307 }
16308
16309 // For non-constant masks, we can always use the record-form and.
16310 return true;
16311}
16312
16313// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16314// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16315// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16316// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16317// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16318SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16319 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16320 assert(Subtarget.hasP9Altivec() &&
16321 "Only combine this when P9 altivec supported!");
16322 EVT VT = N->getValueType(0);
16323 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16324 return SDValue();
16325
16326 SelectionDAG &DAG = DCI.DAG;
16327 SDLoc dl(N);
16328 if (N->getOperand(0).getOpcode() == ISD::SUB) {
16329 // Even for signed integers, if it's known to be positive (as signed
16330 // integer) due to zero-extended inputs.
16331 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16332 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16333 if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16337 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16338 N->getOperand(0)->getOperand(0),
16339 N->getOperand(0)->getOperand(1),
16340 DAG.getTargetConstant(0, dl, MVT::i32));
16341 }
16342
16343 // For type v4i32, it can be optimized with xvnegsp + vabsduw
16344 if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16345 N->getOperand(0).hasOneUse()) {
16346 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16347 N->getOperand(0)->getOperand(0),
16348 N->getOperand(0)->getOperand(1),
16349 DAG.getTargetConstant(1, dl, MVT::i32));
16350 }
16351 }
16352
16353 return SDValue();
16354}
16355
16356// For type v4i32/v8ii16/v16i8, transform
16357// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16358// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16359// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16360// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16361SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16362 DAGCombinerInfo &DCI) const {
16363 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16364 assert(Subtarget.hasP9Altivec() &&
16365 "Only combine this when P9 altivec supported!");
16366
16367 SelectionDAG &DAG = DCI.DAG;
16368 SDLoc dl(N);
16369 SDValue Cond = N->getOperand(0);
16370 SDValue TrueOpnd = N->getOperand(1);
16371 SDValue FalseOpnd = N->getOperand(2);
16372 EVT VT = N->getOperand(1).getValueType();
16373
16374 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16375 FalseOpnd.getOpcode() != ISD::SUB)
16376 return SDValue();
16377
16378 // ABSD only available for type v4i32/v8i16/v16i8
16379 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16380 return SDValue();
16381
16382 // At least to save one more dependent computation
16383 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16384 return SDValue();
16385
16386 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16387
16388 // Can only handle unsigned comparison here
16389 switch (CC) {
16390 default:
16391 return SDValue();
16392 case ISD::SETUGT:
16393 case ISD::SETUGE:
16394 break;
16395 case ISD::SETULT:
16396 case ISD::SETULE:
16398 break;
16399 }
16400
16401 SDValue CmpOpnd1 = Cond.getOperand(0);
16402 SDValue CmpOpnd2 = Cond.getOperand(1);
16403
16404 // SETCC CmpOpnd1 CmpOpnd2 cond
16405 // TrueOpnd = CmpOpnd1 - CmpOpnd2
16406 // FalseOpnd = CmpOpnd2 - CmpOpnd1
16407 if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16408 TrueOpnd.getOperand(1) == CmpOpnd2 &&
16409 FalseOpnd.getOperand(0) == CmpOpnd2 &&
16410 FalseOpnd.getOperand(1) == CmpOpnd1) {
16411 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16413 DAG.getTargetConstant(0, dl, MVT::i32));
16414 }
16415
16416 return SDValue();
16417}
unsigned const MachineRegisterInfo * MRI
if(Register::isVirtualRegister(Reg)) return MRI -> getRegClass(Reg) ->hasSuperClassEq(&AArch64::GPR64RegClass)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
#define Success
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
basic Basic Alias true
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< ShadowStackGC > C("shadow-stack", "Very portable GC for uncooperative code generators")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:26
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition Compiler.h:280
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition Debug.h:122
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
uint64_t Align
uint64_t Offset
uint64_t Addr
uint32_t Index
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
#define RegName(no)
lazy value info
#define F(x, y, z)
Definition MD5.cpp:56
#define I(x, y, z)
Definition MD5.cpp:59
#define G(x, y, z)
Definition MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
unsigned Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static bool isConstantOrUndef(const SDValue Op)
Module.h This file contains the declarations for the Module class.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static bool isFunctionGlobalAddress(SDValue Callee)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static cl::opt< bool > EnableSoftFP128("enable-soft-fp128", cl::desc("temp option to enable soft fp128"), cl::Hidden)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG)
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static Instruction * callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64)
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSplat(ArrayRef< Value * > VL)
Shadow Stack GC Lowering
static bool Enabled
Definition Statistic.cpp:50
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:169
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:197
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:458
static bool is64Bit(const char *name)
xray Insert XRay ops
bool isPosZero() const
Definition APFloat.h:1217
Class for arbitrary precision integers.
Definition APInt.h:70
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition APInt.h:567
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:469
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition APInt.h:667
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition APInt.h:655
This class represents an incoming formal argument to a Function.
Definition Argument.h:29
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:59
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:107
The address of a basic block.
Definition Constants.h:851
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:273
This is the shared class of boolean and integer constants.
Definition Constants.h:77
This is an important base class in LLVM.
Definition Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:111
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:240
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
A debug info location.
Definition DebugLoc.h:33
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:65
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:685
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.h:355
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:682
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:228
const Function & getFunction() const
Definition Function.h:135
arg_iterator arg_begin()
Definition Function.h:762
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:298
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.h:345
const GlobalObject * getBaseObject() const
Definition Globals.cpp:467
StringRef getSection() const
Definition Globals.cpp:162
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
bool hasComdat() const
const BasicBlock * getParent() const
Definition Instruction.h:94
bool hasAtomicLoad() const
Return true if this atomic instruction loads from memory.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition LoopInfo.h:143
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition LoopInfo.h:96
block_iterator block_end() const
Definition LoopInfo.h:177
block_iterator block_begin() const
Definition LoopInfo.h:176
Context object for machine code objects.
Definition MCContext.h:68
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:22
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition MCExpr.h:381
Machine Value Type.
static mvt_range fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
@ INVALID_SIMPLE_VALUE_TYPE
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static mvt_range integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static mvt_range fp_valuetypes()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
uint64_t getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
const SDValue & getBasePtr() const
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
unsigned getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
unsigned getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
bool useLongCalls() const
bool hasFRSQRTE() const
bool is32BitELFABI() const
bool hasMMA() const
unsigned descriptorTOCAnchorOffset() const
bool hasFPCVT() const
bool isAIXABI() const
bool useSoftFloat() const
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
bool hasAltivec() const
bool allowsUnalignedFPAccess() const
const PPCFrameLowering * getFrameLowering() const override
bool needsSwapsForVSXMemOps() const
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool needsTwoConstNR() const
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
bool hasFSQRT() const
bool hasP9Vector() const
bool hasFRE() const
bool hasFRSQRTES() const
MCRegister getEnvironmentPointerRegister() const
const PPCInstrInfo * getInstrInfo() const override
bool hasFPU() const
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
bool hasRecipPrec() const
bool hasSTFIWX() const
bool isSVR4ABI() const
bool hasInvariantFunctionDescriptors() const
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
POPCNTDKind hasPOPCNTD() const
bool hasEFPU2() const
bool hasPrefixInstrs() const
bool hasPartwordAtomics() const
bool hasSPE() const
bool hasLFIWAX() const
bool isLittleEndian() const
bool hasFCPSGN() const
bool isTargetLinux() const
bool hasP9Altivec() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
bool is64BitELFABI() const
bool hasFPRND() const
bool isELFv2ABI() const
bool hasP8Vector() const
bool pairedVectorMemops() const
const PPCTargetMachine & getTargetMachine() const
bool isPredictableSelectIsExpensive() const
bool enableMachineScheduler() const override
Scheduling customization.
bool hasFRES() const
bool isISA3_1() const
bool hasLDBRX() const
const PPCRegisterInfo * getRegisterInfo() const override
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
bool isISA3_0() const
bool hasVSX() const
bool hasDirectMove() const
bool hasP8Altivec() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
unsigned getStackProbeSize(MachineFunction &MF) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always beneficiates from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
Instruction * emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
bool hasInlineStackProbe(MachineFunction &MF) const override
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=None) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
Instruction * emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:71
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
const SDNodeFlags getFlags() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offset=0, unsigned TargetFlags=0)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0)
Test whether V has a splatted value for all the demanded elements.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition TypeSize.h:130
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:57
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:511
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition StringRef.h:160
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:152
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetOptions Options
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:80
static TypeSize Fixed(ScalarTy MinVal)
Definition TypeSize.h:418
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:235
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:148
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
static Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:180
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:151
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:202
Value * getOperand(unsigned i) const
Definition User.h:169
unsigned getNumOperands() const
Definition User.h:191
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:246
User * user_back()
Definition Value.h:410
Implementation for an ilist node.
Definition ilist_node.h:39
self_iterator getIterator()
Definition ilist_node.h:81
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:80
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:42
@ C
C - The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:651
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:229
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition ISDOpcodes.h:954
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition ISDOpcodes.h:950
@ TargetConstantPool
Definition ISDOpcodes.h:161
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:456
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:44
@ FLT_ROUNDS_
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition ISDOpcodes.h:772
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:140
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:243
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:615
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition ISDOpcodes.h:983
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:262
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:232
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition ISDOpcodes.h:863
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:681
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:460
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:192
@ GlobalAddress
Definition ISDOpcodes.h:71
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:688
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:513
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:371
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:589
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:248
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition ISDOpcodes.h:800
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:790
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:222
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:392
@ GlobalTLSAddress
Definition ISDOpcodes.h:72
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:675
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:430
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:558
@ TargetExternalSymbol
Definition ISDOpcodes.h:162
@ BR
Control flow instructions. These all have token chains.
Definition ISDOpcodes.h:879
@ TargetJumpTable
Definition ISDOpcodes.h:160
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition ISDOpcodes.h:857
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition ISDOpcodes.h:808
@ BR_CC
BR_CC - Conditional branch.
Definition ISDOpcodes.h:905
@ BR_JT
BR_JT - Jumptable branch.
Definition ISDOpcodes.h:888
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:329
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:628
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:215
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition ISDOpcodes.h:979
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:157
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:570
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:606
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:550
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:541
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition ISDOpcodes.h:847
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:505
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:678
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:643
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition ISDOpcodes.h:840
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition ISDOpcodes.h:873
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:696
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition ISDOpcodes.h:575
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:775
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:637
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:429
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:122
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:87
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:423
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:445
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:422
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:734
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:450
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:581
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:177
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:272
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:381
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:494
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:763
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:729
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:403
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:134
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:684
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition ISDOpcodes.h:974
@ BRCOND
BRCOND - Conditional branch.
Definition ISDOpcodes.h:898
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:664
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:59
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:470
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:320
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition ISDOpcodes.h:968
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:185
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:158
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:485
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
@ VecShuffle
Definition NVPTX.h:88
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:143
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:107
@ MO_GOT_FLAG
MO_GOT_FLAG - If this bit is set the symbol reference is to be computed via the GOT.
Definition PPC.h:112
@ MO_TPREL_HA
Definition PPC.h:153
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition PPC.h:99
@ MO_TLS
Definition PPC.h:162
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set the symbol reference is relative to TLS Initial Exec model.
Definition PPC.h:124
@ MO_TPREL_LO
Definition PPC.h:152
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:149
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:138
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:133
@ MO_HA
Definition PPC.h:150
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:103
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ VABSD
An SDNode for Power9 vector absolute value difference.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ FP_TO_UINT_IN_VSR
Floating-point-to-interger conversion instructions.
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ RET_FLAG
Return with a flag operand, matched by 'blr'.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ XSMAXCDP
XSMAXCDP, XSMINCDP - C-type min/max instructions.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ XMC_PR
Program Code.
Definition XCOFF.h:40
@ XTY_ER
External reference.
Definition XCOFF.h:176
constexpr double e
Definition MathExtras.h:58
This class represents lattice values for constants.
static bool isIndirectCall(const MachineInstr &MI)
constexpr bool isUInt< 16 >(uint64_t x)
Definition MathExtras.h:409
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:148
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:497
constexpr bool isInt< 16 >(int64_t x)
Definition MathExtras.h:371
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition MathExtras.h:664
unsigned M1(unsigned Val)
Definition VE.h:372
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1505
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition MathExtras.h:696
const NoneType None
Definition None.h:23
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:492
bool convertToNonDenormSingle(APInt &ArgAPInt)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition STLExtras.h:1341
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition MathExtras.h:157
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:132
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition ArrayRef.h:458
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition Error.cpp:140
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
@ Z
zlib style complession
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:158
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1581
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition MathExtras.h:673
unsigned M0(unsigned Val)
Definition VE.h:371
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:762
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned BitWidth
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:221
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:778
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:944
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:163
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:190
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition APFloat.cpp:178
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:355
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:121
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:131
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:333
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:345
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:278
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:341
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:146
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:285
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:290
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:141
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:136
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:66
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:119
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs